Mercurial > repos > miller-lab > genome_diversity
changeset 14:8ae67e9fb6ff
Uploaded Miller Lab Devshed version a51c894f5bed again [possible toolshed.g2 bug]
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/BeautifulSoup.py Fri Sep 28 11:35:56 2012 -0400 @@ -0,0 +1,2014 @@ +"""Beautiful Soup +Elixir and Tonic +"The Screen-Scraper's Friend" +http://www.crummy.com/software/BeautifulSoup/ + +Beautiful Soup parses a (possibly invalid) XML or HTML document into a +tree representation. It provides methods and Pythonic idioms that make +it easy to navigate, search, and modify the tree. + +A well-formed XML/HTML document yields a well-formed data +structure. An ill-formed XML/HTML document yields a correspondingly +ill-formed data structure. If your document is only locally +well-formed, you can use this library to find and process the +well-formed part of it. + +Beautiful Soup works with Python 2.2 and up. It has no external +dependencies, but you'll have more success at converting data to UTF-8 +if you also install these three packages: + +* chardet, for auto-detecting character encodings + http://chardet.feedparser.org/ +* cjkcodecs and iconv_codec, which add more encodings to the ones supported + by stock Python. + http://cjkpython.i18n.org/ + +Beautiful Soup defines classes for two main parsing strategies: + + * BeautifulStoneSoup, for parsing XML, SGML, or your domain-specific + language that kind of looks like XML. + + * BeautifulSoup, for parsing run-of-the-mill HTML code, be it valid + or invalid. This class has web browser-like heuristics for + obtaining a sensible parse tree in the face of common HTML errors. + +Beautiful Soup also defines a class (UnicodeDammit) for autodetecting +the encoding of an HTML or XML document, and converting it to +Unicode. Much of this code is taken from Mark Pilgrim's Universal Feed Parser. + +For more than you ever wanted to know about Beautiful Soup, see the +documentation: +http://www.crummy.com/software/BeautifulSoup/documentation.html + +Here, have some legalese: + +Copyright (c) 2004-2010, Leonard Richardson + +All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are +met: + + * Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + + * Redistributions in binary form must reproduce the above + copyright notice, this list of conditions and the following + disclaimer in the documentation and/or other materials provided + with the distribution. + + * Neither the name of the the Beautiful Soup Consortium and All + Night Kosher Bakery nor the names of its contributors may be + used to endorse or promote products derived from this software + without specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR +CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE, DAMMIT. + +""" +from __future__ import generators + +__author__ = "Leonard Richardson (leonardr@segfault.org)" +__version__ = "3.2.0" +__copyright__ = "Copyright (c) 2004-2010 Leonard Richardson" +__license__ = "New-style BSD" + +from sgmllib import SGMLParser, SGMLParseError +import codecs +import markupbase +import types +import re +import sgmllib +try: + from htmlentitydefs import name2codepoint +except ImportError: + name2codepoint = {} +try: + set +except NameError: + from sets import Set as set + +#These hacks make Beautiful Soup able to parse XML with namespaces +sgmllib.tagfind = re.compile('[a-zA-Z][-_.:a-zA-Z0-9]*') +markupbase._declname_match = re.compile(r'[a-zA-Z][-_.:a-zA-Z0-9]*\s*').match + +DEFAULT_OUTPUT_ENCODING = "utf-8" + +def _match_css_class(str): + """Build a RE to match the given CSS class.""" + return re.compile(r"(^|.*\s)%s($|\s)" % str) + +# First, the classes that represent markup elements. + +class PageElement(object): + """Contains the navigational information for some part of the page + (either a tag or a piece of text)""" + + def setup(self, parent=None, previous=None): + """Sets up the initial relations between this element and + other elements.""" + self.parent = parent + self.previous = previous + self.next = None + self.previousSibling = None + self.nextSibling = None + if self.parent and self.parent.contents: + self.previousSibling = self.parent.contents[-1] + self.previousSibling.nextSibling = self + + def replaceWith(self, replaceWith): + oldParent = self.parent + myIndex = self.parent.index(self) + if hasattr(replaceWith, "parent")\ + and replaceWith.parent is self.parent: + # We're replacing this element with one of its siblings. + index = replaceWith.parent.index(replaceWith) + if index and index < myIndex: + # Furthermore, it comes before this element. That + # means that when we extract it, the index of this + # element will change. + myIndex = myIndex - 1 + self.extract() + oldParent.insert(myIndex, replaceWith) + + def replaceWithChildren(self): + myParent = self.parent + myIndex = self.parent.index(self) + self.extract() + reversedChildren = list(self.contents) + reversedChildren.reverse() + for child in reversedChildren: + myParent.insert(myIndex, child) + + def extract(self): + """Destructively rips this element out of the tree.""" + if self.parent: + try: + del self.parent.contents[self.parent.index(self)] + except ValueError: + pass + + #Find the two elements that would be next to each other if + #this element (and any children) hadn't been parsed. Connect + #the two. + lastChild = self._lastRecursiveChild() + nextElement = lastChild.next + + if self.previous: + self.previous.next = nextElement + if nextElement: + nextElement.previous = self.previous + self.previous = None + lastChild.next = None + + self.parent = None + if self.previousSibling: + self.previousSibling.nextSibling = self.nextSibling + if self.nextSibling: + self.nextSibling.previousSibling = self.previousSibling + self.previousSibling = self.nextSibling = None + return self + + def _lastRecursiveChild(self): + "Finds the last element beneath this object to be parsed." + lastChild = self + while hasattr(lastChild, 'contents') and lastChild.contents: + lastChild = lastChild.contents[-1] + return lastChild + + def insert(self, position, newChild): + if isinstance(newChild, basestring) \ + and not isinstance(newChild, NavigableString): + newChild = NavigableString(newChild) + + position = min(position, len(self.contents)) + if hasattr(newChild, 'parent') and newChild.parent is not None: + # We're 'inserting' an element that's already one + # of this object's children. + if newChild.parent is self: + index = self.index(newChild) + if index > position: + # Furthermore we're moving it further down the + # list of this object's children. That means that + # when we extract this element, our target index + # will jump down one. + position = position - 1 + newChild.extract() + + newChild.parent = self + previousChild = None + if position == 0: + newChild.previousSibling = None + newChild.previous = self + else: + previousChild = self.contents[position-1] + newChild.previousSibling = previousChild + newChild.previousSibling.nextSibling = newChild + newChild.previous = previousChild._lastRecursiveChild() + if newChild.previous: + newChild.previous.next = newChild + + newChildsLastElement = newChild._lastRecursiveChild() + + if position >= len(self.contents): + newChild.nextSibling = None + + parent = self + parentsNextSibling = None + while not parentsNextSibling: + parentsNextSibling = parent.nextSibling + parent = parent.parent + if not parent: # This is the last element in the document. + break + if parentsNextSibling: + newChildsLastElement.next = parentsNextSibling + else: + newChildsLastElement.next = None + else: + nextChild = self.contents[position] + newChild.nextSibling = nextChild + if newChild.nextSibling: + newChild.nextSibling.previousSibling = newChild + newChildsLastElement.next = nextChild + + if newChildsLastElement.next: + newChildsLastElement.next.previous = newChildsLastElement + self.contents.insert(position, newChild) + + def append(self, tag): + """Appends the given tag to the contents of this tag.""" + self.insert(len(self.contents), tag) + + def findNext(self, name=None, attrs={}, text=None, **kwargs): + """Returns the first item that matches the given criteria and + appears after this Tag in the document.""" + return self._findOne(self.findAllNext, name, attrs, text, **kwargs) + + def findAllNext(self, name=None, attrs={}, text=None, limit=None, + **kwargs): + """Returns all items that match the given criteria and appear + after this Tag in the document.""" + return self._findAll(name, attrs, text, limit, self.nextGenerator, + **kwargs) + + def findNextSibling(self, name=None, attrs={}, text=None, **kwargs): + """Returns the closest sibling to this Tag that matches the + given criteria and appears after this Tag in the document.""" + return self._findOne(self.findNextSiblings, name, attrs, text, + **kwargs) + + def findNextSiblings(self, name=None, attrs={}, text=None, limit=None, + **kwargs): + """Returns the siblings of this Tag that match the given + criteria and appear after this Tag in the document.""" + return self._findAll(name, attrs, text, limit, + self.nextSiblingGenerator, **kwargs) + fetchNextSiblings = findNextSiblings # Compatibility with pre-3.x + + def findPrevious(self, name=None, attrs={}, text=None, **kwargs): + """Returns the first item that matches the given criteria and + appears before this Tag in the document.""" + return self._findOne(self.findAllPrevious, name, attrs, text, **kwargs) + + def findAllPrevious(self, name=None, attrs={}, text=None, limit=None, + **kwargs): + """Returns all items that match the given criteria and appear + before this Tag in the document.""" + return self._findAll(name, attrs, text, limit, self.previousGenerator, + **kwargs) + fetchPrevious = findAllPrevious # Compatibility with pre-3.x + + def findPreviousSibling(self, name=None, attrs={}, text=None, **kwargs): + """Returns the closest sibling to this Tag that matches the + given criteria and appears before this Tag in the document.""" + return self._findOne(self.findPreviousSiblings, name, attrs, text, + **kwargs) + + def findPreviousSiblings(self, name=None, attrs={}, text=None, + limit=None, **kwargs): + """Returns the siblings of this Tag that match the given + criteria and appear before this Tag in the document.""" + return self._findAll(name, attrs, text, limit, + self.previousSiblingGenerator, **kwargs) + fetchPreviousSiblings = findPreviousSiblings # Compatibility with pre-3.x + + def findParent(self, name=None, attrs={}, **kwargs): + """Returns the closest parent of this Tag that matches the given + criteria.""" + # NOTE: We can't use _findOne because findParents takes a different + # set of arguments. + r = None + l = self.findParents(name, attrs, 1) + if l: + r = l[0] + return r + + def findParents(self, name=None, attrs={}, limit=None, **kwargs): + """Returns the parents of this Tag that match the given + criteria.""" + + return self._findAll(name, attrs, None, limit, self.parentGenerator, + **kwargs) + fetchParents = findParents # Compatibility with pre-3.x + + #These methods do the real heavy lifting. + + def _findOne(self, method, name, attrs, text, **kwargs): + r = None + l = method(name, attrs, text, 1, **kwargs) + if l: + r = l[0] + return r + + def _findAll(self, name, attrs, text, limit, generator, **kwargs): + "Iterates over a generator looking for things that match." + + if isinstance(name, SoupStrainer): + strainer = name + # (Possibly) special case some findAll*(...) searches + elif text is None and not limit and not attrs and not kwargs: + # findAll*(True) + if name is True: + return [element for element in generator() + if isinstance(element, Tag)] + # findAll*('tag-name') + elif isinstance(name, basestring): + return [element for element in generator() + if isinstance(element, Tag) and + element.name == name] + else: + strainer = SoupStrainer(name, attrs, text, **kwargs) + # Build a SoupStrainer + else: + strainer = SoupStrainer(name, attrs, text, **kwargs) + results = ResultSet(strainer) + g = generator() + while True: + try: + i = g.next() + except StopIteration: + break + if i: + found = strainer.search(i) + if found: + results.append(found) + if limit and len(results) >= limit: + break + return results + + #These Generators can be used to navigate starting from both + #NavigableStrings and Tags. + def nextGenerator(self): + i = self + while i is not None: + i = i.next + yield i + + def nextSiblingGenerator(self): + i = self + while i is not None: + i = i.nextSibling + yield i + + def previousGenerator(self): + i = self + while i is not None: + i = i.previous + yield i + + def previousSiblingGenerator(self): + i = self + while i is not None: + i = i.previousSibling + yield i + + def parentGenerator(self): + i = self + while i is not None: + i = i.parent + yield i + + # Utility methods + def substituteEncoding(self, str, encoding=None): + encoding = encoding or "utf-8" + return str.replace("%SOUP-ENCODING%", encoding) + + def toEncoding(self, s, encoding=None): + """Encodes an object to a string in some encoding, or to Unicode. + .""" + if isinstance(s, unicode): + if encoding: + s = s.encode(encoding) + elif isinstance(s, str): + if encoding: + s = s.encode(encoding) + else: + s = unicode(s) + else: + if encoding: + s = self.toEncoding(str(s), encoding) + else: + s = unicode(s) + return s + +class NavigableString(unicode, PageElement): + + def __new__(cls, value): + """Create a new NavigableString. + + When unpickling a NavigableString, this method is called with + the string in DEFAULT_OUTPUT_ENCODING. That encoding needs to be + passed in to the superclass's __new__ or the superclass won't know + how to handle non-ASCII characters. + """ + if isinstance(value, unicode): + return unicode.__new__(cls, value) + return unicode.__new__(cls, value, DEFAULT_OUTPUT_ENCODING) + + def __getnewargs__(self): + return (NavigableString.__str__(self),) + + def __getattr__(self, attr): + """text.string gives you text. This is for backwards + compatibility for Navigable*String, but for CData* it lets you + get the string without the CData wrapper.""" + if attr == 'string': + return self + else: + raise AttributeError, "'%s' object has no attribute '%s'" % (self.__class__.__name__, attr) + + def __unicode__(self): + return str(self).decode(DEFAULT_OUTPUT_ENCODING) + + def __str__(self, encoding=DEFAULT_OUTPUT_ENCODING): + if encoding: + return self.encode(encoding) + else: + return self + +class CData(NavigableString): + + def __str__(self, encoding=DEFAULT_OUTPUT_ENCODING): + return "<![CDATA[%s]]>" % NavigableString.__str__(self, encoding) + +class ProcessingInstruction(NavigableString): + def __str__(self, encoding=DEFAULT_OUTPUT_ENCODING): + output = self + if "%SOUP-ENCODING%" in output: + output = self.substituteEncoding(output, encoding) + return "<?%s?>" % self.toEncoding(output, encoding) + +class Comment(NavigableString): + def __str__(self, encoding=DEFAULT_OUTPUT_ENCODING): + return "<!--%s-->" % NavigableString.__str__(self, encoding) + +class Declaration(NavigableString): + def __str__(self, encoding=DEFAULT_OUTPUT_ENCODING): + return "<!%s>" % NavigableString.__str__(self, encoding) + +class Tag(PageElement): + + """Represents a found HTML tag with its attributes and contents.""" + + def _invert(h): + "Cheap function to invert a hash." + i = {} + for k,v in h.items(): + i[v] = k + return i + + XML_ENTITIES_TO_SPECIAL_CHARS = { "apos" : "'", + "quot" : '"', + "amp" : "&", + "lt" : "<", + "gt" : ">" } + + XML_SPECIAL_CHARS_TO_ENTITIES = _invert(XML_ENTITIES_TO_SPECIAL_CHARS) + + def _convertEntities(self, match): + """Used in a call to re.sub to replace HTML, XML, and numeric + entities with the appropriate Unicode characters. If HTML + entities are being converted, any unrecognized entities are + escaped.""" + x = match.group(1) + if self.convertHTMLEntities and x in name2codepoint: + return unichr(name2codepoint[x]) + elif x in self.XML_ENTITIES_TO_SPECIAL_CHARS: + if self.convertXMLEntities: + return self.XML_ENTITIES_TO_SPECIAL_CHARS[x] + else: + return u'&%s;' % x + elif len(x) > 0 and x[0] == '#': + # Handle numeric entities + if len(x) > 1 and x[1] == 'x': + return unichr(int(x[2:], 16)) + else: + return unichr(int(x[1:])) + + elif self.escapeUnrecognizedEntities: + return u'&%s;' % x + else: + return u'&%s;' % x + + def __init__(self, parser, name, attrs=None, parent=None, + previous=None): + "Basic constructor." + + # We don't actually store the parser object: that lets extracted + # chunks be garbage-collected + self.parserClass = parser.__class__ + self.isSelfClosing = parser.isSelfClosingTag(name) + self.name = name + if attrs is None: + attrs = [] + elif isinstance(attrs, dict): + attrs = attrs.items() + self.attrs = attrs + self.contents = [] + self.setup(parent, previous) + self.hidden = False + self.containsSubstitutions = False + self.convertHTMLEntities = parser.convertHTMLEntities + self.convertXMLEntities = parser.convertXMLEntities + self.escapeUnrecognizedEntities = parser.escapeUnrecognizedEntities + + # Convert any HTML, XML, or numeric entities in the attribute values. + convert = lambda(k, val): (k, + re.sub("&(#\d+|#x[0-9a-fA-F]+|\w+);", + self._convertEntities, + val)) + self.attrs = map(convert, self.attrs) + + def getString(self): + if (len(self.contents) == 1 + and isinstance(self.contents[0], NavigableString)): + return self.contents[0] + + def setString(self, string): + """Replace the contents of the tag with a string""" + self.clear() + self.append(string) + + string = property(getString, setString) + + def getText(self, separator=u""): + if not len(self.contents): + return u"" + stopNode = self._lastRecursiveChild().next + strings = [] + current = self.contents[0] + while current is not stopNode: + if isinstance(current, NavigableString): + strings.append(current.strip()) + current = current.next + return separator.join(strings) + + text = property(getText) + + def get(self, key, default=None): + """Returns the value of the 'key' attribute for the tag, or + the value given for 'default' if it doesn't have that + attribute.""" + return self._getAttrMap().get(key, default) + + def clear(self): + """Extract all children.""" + for child in self.contents[:]: + child.extract() + + def index(self, element): + for i, child in enumerate(self.contents): + if child is element: + return i + raise ValueError("Tag.index: element not in tag") + + def has_key(self, key): + return self._getAttrMap().has_key(key) + + def __getitem__(self, key): + """tag[key] returns the value of the 'key' attribute for the tag, + and throws an exception if it's not there.""" + return self._getAttrMap()[key] + + def __iter__(self): + "Iterating over a tag iterates over its contents." + return iter(self.contents) + + def __len__(self): + "The length of a tag is the length of its list of contents." + return len(self.contents) + + def __contains__(self, x): + return x in self.contents + + def __nonzero__(self): + "A tag is non-None even if it has no contents." + return True + + def __setitem__(self, key, value): + """Setting tag[key] sets the value of the 'key' attribute for the + tag.""" + self._getAttrMap() + self.attrMap[key] = value + found = False + for i in range(0, len(self.attrs)): + if self.attrs[i][0] == key: + self.attrs[i] = (key, value) + found = True + if not found: + self.attrs.append((key, value)) + self._getAttrMap()[key] = value + + def __delitem__(self, key): + "Deleting tag[key] deletes all 'key' attributes for the tag." + for item in self.attrs: + if item[0] == key: + self.attrs.remove(item) + #We don't break because bad HTML can define the same + #attribute multiple times. + self._getAttrMap() + if self.attrMap.has_key(key): + del self.attrMap[key] + + def __call__(self, *args, **kwargs): + """Calling a tag like a function is the same as calling its + findAll() method. Eg. tag('a') returns a list of all the A tags + found within this tag.""" + return apply(self.findAll, args, kwargs) + + def __getattr__(self, tag): + #print "Getattr %s.%s" % (self.__class__, tag) + if len(tag) > 3 and tag.rfind('Tag') == len(tag)-3: + return self.find(tag[:-3]) + elif tag.find('__') != 0: + return self.find(tag) + raise AttributeError, "'%s' object has no attribute '%s'" % (self.__class__, tag) + + def __eq__(self, other): + """Returns true iff this tag has the same name, the same attributes, + and the same contents (recursively) as the given tag. + + NOTE: right now this will return false if two tags have the + same attributes in a different order. Should this be fixed?""" + if other is self: + return True + if not hasattr(other, 'name') or not hasattr(other, 'attrs') or not hasattr(other, 'contents') or self.name != other.name or self.attrs != other.attrs or len(self) != len(other): + return False + for i in range(0, len(self.contents)): + if self.contents[i] != other.contents[i]: + return False + return True + + def __ne__(self, other): + """Returns true iff this tag is not identical to the other tag, + as defined in __eq__.""" + return not self == other + + def __repr__(self, encoding=DEFAULT_OUTPUT_ENCODING): + """Renders this tag as a string.""" + return self.__str__(encoding) + + def __unicode__(self): + return self.__str__(None) + + BARE_AMPERSAND_OR_BRACKET = re.compile("([<>]|" + + "&(?!#\d+;|#x[0-9a-fA-F]+;|\w+;)" + + ")") + + def _sub_entity(self, x): + """Used with a regular expression to substitute the + appropriate XML entity for an XML special character.""" + return "&" + self.XML_SPECIAL_CHARS_TO_ENTITIES[x.group(0)[0]] + ";" + + def __str__(self, encoding=DEFAULT_OUTPUT_ENCODING, + prettyPrint=False, indentLevel=0): + """Returns a string or Unicode representation of this tag and + its contents. To get Unicode, pass None for encoding. + + NOTE: since Python's HTML parser consumes whitespace, this + method is not certain to reproduce the whitespace present in + the original string.""" + + encodedName = self.toEncoding(self.name, encoding) + + attrs = [] + if self.attrs: + for key, val in self.attrs: + fmt = '%s="%s"' + if isinstance(val, basestring): + if self.containsSubstitutions and '%SOUP-ENCODING%' in val: + val = self.substituteEncoding(val, encoding) + + # The attribute value either: + # + # * Contains no embedded double quotes or single quotes. + # No problem: we enclose it in double quotes. + # * Contains embedded single quotes. No problem: + # double quotes work here too. + # * Contains embedded double quotes. No problem: + # we enclose it in single quotes. + # * Embeds both single _and_ double quotes. This + # can't happen naturally, but it can happen if + # you modify an attribute value after parsing + # the document. Now we have a bit of a + # problem. We solve it by enclosing the + # attribute in single quotes, and escaping any + # embedded single quotes to XML entities. + if '"' in val: + fmt = "%s='%s'" + if "'" in val: + # TODO: replace with apos when + # appropriate. + val = val.replace("'", "&squot;") + + # Now we're okay w/r/t quotes. But the attribute + # value might also contain angle brackets, or + # ampersands that aren't part of entities. We need + # to escape those to XML entities too. + val = self.BARE_AMPERSAND_OR_BRACKET.sub(self._sub_entity, val) + + attrs.append(fmt % (self.toEncoding(key, encoding), + self.toEncoding(val, encoding))) + close = '' + closeTag = '' + if self.isSelfClosing: + close = ' /' + else: + closeTag = '</%s>' % encodedName + + indentTag, indentContents = 0, 0 + if prettyPrint: + indentTag = indentLevel + space = (' ' * (indentTag-1)) + indentContents = indentTag + 1 + contents = self.renderContents(encoding, prettyPrint, indentContents) + if self.hidden: + s = contents + else: + s = [] + attributeString = '' + if attrs: + attributeString = ' ' + ' '.join(attrs) + if prettyPrint: + s.append(space) + s.append('<%s%s%s>' % (encodedName, attributeString, close)) + if prettyPrint: + s.append("\n") + s.append(contents) + if prettyPrint and contents and contents[-1] != "\n": + s.append("\n") + if prettyPrint and closeTag: + s.append(space) + s.append(closeTag) + if prettyPrint and closeTag and self.nextSibling: + s.append("\n") + s = ''.join(s) + return s + + def decompose(self): + """Recursively destroys the contents of this tree.""" + self.extract() + if len(self.contents) == 0: + return + current = self.contents[0] + while current is not None: + next = current.next + if isinstance(current, Tag): + del current.contents[:] + current.parent = None + current.previous = None + current.previousSibling = None + current.next = None + current.nextSibling = None + current = next + + def prettify(self, encoding=DEFAULT_OUTPUT_ENCODING): + return self.__str__(encoding, True) + + def renderContents(self, encoding=DEFAULT_OUTPUT_ENCODING, + prettyPrint=False, indentLevel=0): + """Renders the contents of this tag as a string in the given + encoding. If encoding is None, returns a Unicode string..""" + s=[] + for c in self: + text = None + if isinstance(c, NavigableString): + text = c.__str__(encoding) + elif isinstance(c, Tag): + s.append(c.__str__(encoding, prettyPrint, indentLevel)) + if text and prettyPrint: + text = text.strip() + if text: + if prettyPrint: + s.append(" " * (indentLevel-1)) + s.append(text) + if prettyPrint: + s.append("\n") + return ''.join(s) + + #Soup methods + + def find(self, name=None, attrs={}, recursive=True, text=None, + **kwargs): + """Return only the first child of this Tag matching the given + criteria.""" + r = None + l = self.findAll(name, attrs, recursive, text, 1, **kwargs) + if l: + r = l[0] + return r + findChild = find + + def findAll(self, name=None, attrs={}, recursive=True, text=None, + limit=None, **kwargs): + """Extracts a list of Tag objects that match the given + criteria. You can specify the name of the Tag and any + attributes you want the Tag to have. + + The value of a key-value pair in the 'attrs' map can be a + string, a list of strings, a regular expression object, or a + callable that takes a string and returns whether or not the + string matches for some custom definition of 'matches'. The + same is true of the tag name.""" + generator = self.recursiveChildGenerator + if not recursive: + generator = self.childGenerator + return self._findAll(name, attrs, text, limit, generator, **kwargs) + findChildren = findAll + + # Pre-3.x compatibility methods + first = find + fetch = findAll + + def fetchText(self, text=None, recursive=True, limit=None): + return self.findAll(text=text, recursive=recursive, limit=limit) + + def firstText(self, text=None, recursive=True): + return self.find(text=text, recursive=recursive) + + #Private methods + + def _getAttrMap(self): + """Initializes a map representation of this tag's attributes, + if not already initialized.""" + if not getattr(self, 'attrMap'): + self.attrMap = {} + for (key, value) in self.attrs: + self.attrMap[key] = value + return self.attrMap + + #Generator methods + def childGenerator(self): + # Just use the iterator from the contents + return iter(self.contents) + + def recursiveChildGenerator(self): + if not len(self.contents): + raise StopIteration + stopNode = self._lastRecursiveChild().next + current = self.contents[0] + while current is not stopNode: + yield current + current = current.next + + +# Next, a couple classes to represent queries and their results. +class SoupStrainer: + """Encapsulates a number of ways of matching a markup element (tag or + text).""" + + def __init__(self, name=None, attrs={}, text=None, **kwargs): + self.name = name + if isinstance(attrs, basestring): + kwargs['class'] = _match_css_class(attrs) + attrs = None + if kwargs: + if attrs: + attrs = attrs.copy() + attrs.update(kwargs) + else: + attrs = kwargs + self.attrs = attrs + self.text = text + + def __str__(self): + if self.text: + return self.text + else: + return "%s|%s" % (self.name, self.attrs) + + def searchTag(self, markupName=None, markupAttrs={}): + found = None + markup = None + if isinstance(markupName, Tag): + markup = markupName + markupAttrs = markup + callFunctionWithTagData = callable(self.name) \ + and not isinstance(markupName, Tag) + + if (not self.name) \ + or callFunctionWithTagData \ + or (markup and self._matches(markup, self.name)) \ + or (not markup and self._matches(markupName, self.name)): + if callFunctionWithTagData: + match = self.name(markupName, markupAttrs) + else: + match = True + markupAttrMap = None + for attr, matchAgainst in self.attrs.items(): + if not markupAttrMap: + if hasattr(markupAttrs, 'get'): + markupAttrMap = markupAttrs + else: + markupAttrMap = {} + for k,v in markupAttrs: + markupAttrMap[k] = v + attrValue = markupAttrMap.get(attr) + if not self._matches(attrValue, matchAgainst): + match = False + break + if match: + if markup: + found = markup + else: + found = markupName + return found + + def search(self, markup): + #print 'looking for %s in %s' % (self, markup) + found = None + # If given a list of items, scan it for a text element that + # matches. + if hasattr(markup, "__iter__") \ + and not isinstance(markup, Tag): + for element in markup: + if isinstance(element, NavigableString) \ + and self.search(element): + found = element + break + # If it's a Tag, make sure its name or attributes match. + # Don't bother with Tags if we're searching for text. + elif isinstance(markup, Tag): + if not self.text: + found = self.searchTag(markup) + # If it's text, make sure the text matches. + elif isinstance(markup, NavigableString) or \ + isinstance(markup, basestring): + if self._matches(markup, self.text): + found = markup + else: + raise Exception, "I don't know how to match against a %s" \ + % markup.__class__ + return found + + def _matches(self, markup, matchAgainst): + #print "Matching %s against %s" % (markup, matchAgainst) + result = False + if matchAgainst is True: + result = markup is not None + elif callable(matchAgainst): + result = matchAgainst(markup) + else: + #Custom match methods take the tag as an argument, but all + #other ways of matching match the tag name as a string. + if isinstance(markup, Tag): + markup = markup.name + if markup and not isinstance(markup, basestring): + markup = unicode(markup) + #Now we know that chunk is either a string, or None. + if hasattr(matchAgainst, 'match'): + # It's a regexp object. + result = markup and matchAgainst.search(markup) + elif hasattr(matchAgainst, '__iter__'): # list-like + result = markup in matchAgainst + elif hasattr(matchAgainst, 'items'): + result = markup.has_key(matchAgainst) + elif matchAgainst and isinstance(markup, basestring): + if isinstance(markup, unicode): + matchAgainst = unicode(matchAgainst) + else: + matchAgainst = str(matchAgainst) + + if not result: + result = matchAgainst == markup + return result + +class ResultSet(list): + """A ResultSet is just a list that keeps track of the SoupStrainer + that created it.""" + def __init__(self, source): + list.__init__([]) + self.source = source + +# Now, some helper functions. + +def buildTagMap(default, *args): + """Turns a list of maps, lists, or scalars into a single map. + Used to build the SELF_CLOSING_TAGS, NESTABLE_TAGS, and + NESTING_RESET_TAGS maps out of lists and partial maps.""" + built = {} + for portion in args: + if hasattr(portion, 'items'): + #It's a map. Merge it. + for k,v in portion.items(): + built[k] = v + elif hasattr(portion, '__iter__'): # is a list + #It's a list. Map each item to the default. + for k in portion: + built[k] = default + else: + #It's a scalar. Map it to the default. + built[portion] = default + return built + +# Now, the parser classes. + +class BeautifulStoneSoup(Tag, SGMLParser): + + """This class contains the basic parser and search code. It defines + a parser that knows nothing about tag behavior except for the + following: + + You can't close a tag without closing all the tags it encloses. + That is, "<foo><bar></foo>" actually means + "<foo><bar></bar></foo>". + + [Another possible explanation is "<foo><bar /></foo>", but since + this class defines no SELF_CLOSING_TAGS, it will never use that + explanation.] + + This class is useful for parsing XML or made-up markup languages, + or when BeautifulSoup makes an assumption counter to what you were + expecting.""" + + SELF_CLOSING_TAGS = {} + NESTABLE_TAGS = {} + RESET_NESTING_TAGS = {} + QUOTE_TAGS = {} + PRESERVE_WHITESPACE_TAGS = [] + + MARKUP_MASSAGE = [(re.compile('(<[^<>]*)/>'), + lambda x: x.group(1) + ' />'), + (re.compile('<!\s+([^<>]*)>'), + lambda x: '<!' + x.group(1) + '>') + ] + + ROOT_TAG_NAME = u'[document]' + + HTML_ENTITIES = "html" + XML_ENTITIES = "xml" + XHTML_ENTITIES = "xhtml" + # TODO: This only exists for backwards-compatibility + ALL_ENTITIES = XHTML_ENTITIES + + # Used when determining whether a text node is all whitespace and + # can be replaced with a single space. A text node that contains + # fancy Unicode spaces (usually non-breaking) should be left + # alone. + STRIP_ASCII_SPACES = { 9: None, 10: None, 12: None, 13: None, 32: None, } + + def __init__(self, markup="", parseOnlyThese=None, fromEncoding=None, + markupMassage=True, smartQuotesTo=XML_ENTITIES, + convertEntities=None, selfClosingTags=None, isHTML=False): + """The Soup object is initialized as the 'root tag', and the + provided markup (which can be a string or a file-like object) + is fed into the underlying parser. + + sgmllib will process most bad HTML, and the BeautifulSoup + class has some tricks for dealing with some HTML that kills + sgmllib, but Beautiful Soup can nonetheless choke or lose data + if your data uses self-closing tags or declarations + incorrectly. + + By default, Beautiful Soup uses regexes to sanitize input, + avoiding the vast majority of these problems. If the problems + don't apply to you, pass in False for markupMassage, and + you'll get better performance. + + The default parser massage techniques fix the two most common + instances of invalid HTML that choke sgmllib: + + <br/> (No space between name of closing tag and tag close) + <! --Comment--> (Extraneous whitespace in declaration) + + You can pass in a custom list of (RE object, replace method) + tuples to get Beautiful Soup to scrub your input the way you + want.""" + + self.parseOnlyThese = parseOnlyThese + self.fromEncoding = fromEncoding + self.smartQuotesTo = smartQuotesTo + self.convertEntities = convertEntities + # Set the rules for how we'll deal with the entities we + # encounter + if self.convertEntities: + # It doesn't make sense to convert encoded characters to + # entities even while you're converting entities to Unicode. + # Just convert it all to Unicode. + self.smartQuotesTo = None + if convertEntities == self.HTML_ENTITIES: + self.convertXMLEntities = False + self.convertHTMLEntities = True + self.escapeUnrecognizedEntities = True + elif convertEntities == self.XHTML_ENTITIES: + self.convertXMLEntities = True + self.convertHTMLEntities = True + self.escapeUnrecognizedEntities = False + elif convertEntities == self.XML_ENTITIES: + self.convertXMLEntities = True + self.convertHTMLEntities = False + self.escapeUnrecognizedEntities = False + else: + self.convertXMLEntities = False + self.convertHTMLEntities = False + self.escapeUnrecognizedEntities = False + + self.instanceSelfClosingTags = buildTagMap(None, selfClosingTags) + SGMLParser.__init__(self) + + if hasattr(markup, 'read'): # It's a file-type object. + markup = markup.read() + self.markup = markup + self.markupMassage = markupMassage + try: + self._feed(isHTML=isHTML) + except StopParsing: + pass + self.markup = None # The markup can now be GCed + + def convert_charref(self, name): + """This method fixes a bug in Python's SGMLParser.""" + try: + n = int(name) + except ValueError: + return + if not 0 <= n <= 127 : # ASCII ends at 127, not 255 + return + return self.convert_codepoint(n) + + def _feed(self, inDocumentEncoding=None, isHTML=False): + # Convert the document to Unicode. + markup = self.markup + if isinstance(markup, unicode): + if not hasattr(self, 'originalEncoding'): + self.originalEncoding = None + else: + dammit = UnicodeDammit\ + (markup, [self.fromEncoding, inDocumentEncoding], + smartQuotesTo=self.smartQuotesTo, isHTML=isHTML) + markup = dammit.unicode + self.originalEncoding = dammit.originalEncoding + self.declaredHTMLEncoding = dammit.declaredHTMLEncoding + if markup: + if self.markupMassage: + if not hasattr(self.markupMassage, "__iter__"): + self.markupMassage = self.MARKUP_MASSAGE + for fix, m in self.markupMassage: + markup = fix.sub(m, markup) + # TODO: We get rid of markupMassage so that the + # soup object can be deepcopied later on. Some + # Python installations can't copy regexes. If anyone + # was relying on the existence of markupMassage, this + # might cause problems. + del(self.markupMassage) + self.reset() + + SGMLParser.feed(self, markup) + # Close out any unfinished strings and close all the open tags. + self.endData() + while self.currentTag.name != self.ROOT_TAG_NAME: + self.popTag() + + def __getattr__(self, methodName): + """This method routes method call requests to either the SGMLParser + superclass or the Tag superclass, depending on the method name.""" + #print "__getattr__ called on %s.%s" % (self.__class__, methodName) + + if methodName.startswith('start_') or methodName.startswith('end_') \ + or methodName.startswith('do_'): + return SGMLParser.__getattr__(self, methodName) + elif not methodName.startswith('__'): + return Tag.__getattr__(self, methodName) + else: + raise AttributeError + + def isSelfClosingTag(self, name): + """Returns true iff the given string is the name of a + self-closing tag according to this parser.""" + return self.SELF_CLOSING_TAGS.has_key(name) \ + or self.instanceSelfClosingTags.has_key(name) + + def reset(self): + Tag.__init__(self, self, self.ROOT_TAG_NAME) + self.hidden = 1 + SGMLParser.reset(self) + self.currentData = [] + self.currentTag = None + self.tagStack = [] + self.quoteStack = [] + self.pushTag(self) + + def popTag(self): + tag = self.tagStack.pop() + + #print "Pop", tag.name + if self.tagStack: + self.currentTag = self.tagStack[-1] + return self.currentTag + + def pushTag(self, tag): + #print "Push", tag.name + if self.currentTag: + self.currentTag.contents.append(tag) + self.tagStack.append(tag) + self.currentTag = self.tagStack[-1] + + def endData(self, containerClass=NavigableString): + if self.currentData: + currentData = u''.join(self.currentData) + if (currentData.translate(self.STRIP_ASCII_SPACES) == '' and + not set([tag.name for tag in self.tagStack]).intersection( + self.PRESERVE_WHITESPACE_TAGS)): + if '\n' in currentData: + currentData = '\n' + else: + currentData = ' ' + self.currentData = [] + if self.parseOnlyThese and len(self.tagStack) <= 1 and \ + (not self.parseOnlyThese.text or \ + not self.parseOnlyThese.search(currentData)): + return + o = containerClass(currentData) + o.setup(self.currentTag, self.previous) + if self.previous: + self.previous.next = o + self.previous = o + self.currentTag.contents.append(o) + + + def _popToTag(self, name, inclusivePop=True): + """Pops the tag stack up to and including the most recent + instance of the given tag. If inclusivePop is false, pops the tag + stack up to but *not* including the most recent instqance of + the given tag.""" + #print "Popping to %s" % name + if name == self.ROOT_TAG_NAME: + return + + numPops = 0 + mostRecentTag = None + for i in range(len(self.tagStack)-1, 0, -1): + if name == self.tagStack[i].name: + numPops = len(self.tagStack)-i + break + if not inclusivePop: + numPops = numPops - 1 + + for i in range(0, numPops): + mostRecentTag = self.popTag() + return mostRecentTag + + def _smartPop(self, name): + + """We need to pop up to the previous tag of this type, unless + one of this tag's nesting reset triggers comes between this + tag and the previous tag of this type, OR unless this tag is a + generic nesting trigger and another generic nesting trigger + comes between this tag and the previous tag of this type. + + Examples: + <p>Foo<b>Bar *<p>* should pop to 'p', not 'b'. + <p>Foo<table>Bar *<p>* should pop to 'table', not 'p'. + <p>Foo<table><tr>Bar *<p>* should pop to 'tr', not 'p'. + + <li><ul><li> *<li>* should pop to 'ul', not the first 'li'. + <tr><table><tr> *<tr>* should pop to 'table', not the first 'tr' + <td><tr><td> *<td>* should pop to 'tr', not the first 'td' + """ + + nestingResetTriggers = self.NESTABLE_TAGS.get(name) + isNestable = nestingResetTriggers != None + isResetNesting = self.RESET_NESTING_TAGS.has_key(name) + popTo = None + inclusive = True + for i in range(len(self.tagStack)-1, 0, -1): + p = self.tagStack[i] + if (not p or p.name == name) and not isNestable: + #Non-nestable tags get popped to the top or to their + #last occurance. + popTo = name + break + if (nestingResetTriggers is not None + and p.name in nestingResetTriggers) \ + or (nestingResetTriggers is None and isResetNesting + and self.RESET_NESTING_TAGS.has_key(p.name)): + + #If we encounter one of the nesting reset triggers + #peculiar to this tag, or we encounter another tag + #that causes nesting to reset, pop up to but not + #including that tag. + popTo = p.name + inclusive = False + break + p = p.parent + if popTo: + self._popToTag(popTo, inclusive) + + def unknown_starttag(self, name, attrs, selfClosing=0): + #print "Start tag %s: %s" % (name, attrs) + if self.quoteStack: + #This is not a real tag. + #print "<%s> is not real!" % name + attrs = ''.join([' %s="%s"' % (x, y) for x, y in attrs]) + self.handle_data('<%s%s>' % (name, attrs)) + return + self.endData() + + if not self.isSelfClosingTag(name) and not selfClosing: + self._smartPop(name) + + if self.parseOnlyThese and len(self.tagStack) <= 1 \ + and (self.parseOnlyThese.text or not self.parseOnlyThese.searchTag(name, attrs)): + return + + tag = Tag(self, name, attrs, self.currentTag, self.previous) + if self.previous: + self.previous.next = tag + self.previous = tag + self.pushTag(tag) + if selfClosing or self.isSelfClosingTag(name): + self.popTag() + if name in self.QUOTE_TAGS: + #print "Beginning quote (%s)" % name + self.quoteStack.append(name) + self.literal = 1 + return tag + + def unknown_endtag(self, name): + #print "End tag %s" % name + if self.quoteStack and self.quoteStack[-1] != name: + #This is not a real end tag. + #print "</%s> is not real!" % name + self.handle_data('</%s>' % name) + return + self.endData() + self._popToTag(name) + if self.quoteStack and self.quoteStack[-1] == name: + self.quoteStack.pop() + self.literal = (len(self.quoteStack) > 0) + + def handle_data(self, data): + self.currentData.append(data) + + def _toStringSubclass(self, text, subclass): + """Adds a certain piece of text to the tree as a NavigableString + subclass.""" + self.endData() + self.handle_data(text) + self.endData(subclass) + + def handle_pi(self, text): + """Handle a processing instruction as a ProcessingInstruction + object, possibly one with a %SOUP-ENCODING% slot into which an + encoding will be plugged later.""" + if text[:3] == "xml": + text = u"xml version='1.0' encoding='%SOUP-ENCODING%'" + self._toStringSubclass(text, ProcessingInstruction) + + def handle_comment(self, text): + "Handle comments as Comment objects." + self._toStringSubclass(text, Comment) + + def handle_charref(self, ref): + "Handle character references as data." + if self.convertEntities: + data = unichr(int(ref)) + else: + data = '&#%s;' % ref + self.handle_data(data) + + def handle_entityref(self, ref): + """Handle entity references as data, possibly converting known + HTML and/or XML entity references to the corresponding Unicode + characters.""" + data = None + if self.convertHTMLEntities: + try: + data = unichr(name2codepoint[ref]) + except KeyError: + pass + + if not data and self.convertXMLEntities: + data = self.XML_ENTITIES_TO_SPECIAL_CHARS.get(ref) + + if not data and self.convertHTMLEntities and \ + not self.XML_ENTITIES_TO_SPECIAL_CHARS.get(ref): + # TODO: We've got a problem here. We're told this is + # an entity reference, but it's not an XML entity + # reference or an HTML entity reference. Nonetheless, + # the logical thing to do is to pass it through as an + # unrecognized entity reference. + # + # Except: when the input is "&carol;" this function + # will be called with input "carol". When the input is + # "AT&T", this function will be called with input + # "T". We have no way of knowing whether a semicolon + # was present originally, so we don't know whether + # this is an unknown entity or just a misplaced + # ampersand. + # + # The more common case is a misplaced ampersand, so I + # escape the ampersand and omit the trailing semicolon. + data = "&%s" % ref + if not data: + # This case is different from the one above, because we + # haven't already gone through a supposedly comprehensive + # mapping of entities to Unicode characters. We might not + # have gone through any mapping at all. So the chances are + # very high that this is a real entity, and not a + # misplaced ampersand. + data = "&%s;" % ref + self.handle_data(data) + + def handle_decl(self, data): + "Handle DOCTYPEs and the like as Declaration objects." + self._toStringSubclass(data, Declaration) + + def parse_declaration(self, i): + """Treat a bogus SGML declaration as raw data. Treat a CDATA + declaration as a CData object.""" + j = None + if self.rawdata[i:i+9] == '<![CDATA[': + k = self.rawdata.find(']]>', i) + if k == -1: + k = len(self.rawdata) + data = self.rawdata[i+9:k] + j = k+3 + self._toStringSubclass(data, CData) + else: + try: + j = SGMLParser.parse_declaration(self, i) + except SGMLParseError: + toHandle = self.rawdata[i:] + self.handle_data(toHandle) + j = i + len(toHandle) + return j + +class BeautifulSoup(BeautifulStoneSoup): + + """This parser knows the following facts about HTML: + + * Some tags have no closing tag and should be interpreted as being + closed as soon as they are encountered. + + * The text inside some tags (ie. 'script') may contain tags which + are not really part of the document and which should be parsed + as text, not tags. If you want to parse the text as tags, you can + always fetch it and parse it explicitly. + + * Tag nesting rules: + + Most tags can't be nested at all. For instance, the occurance of + a <p> tag should implicitly close the previous <p> tag. + + <p>Para1<p>Para2 + should be transformed into: + <p>Para1</p><p>Para2 + + Some tags can be nested arbitrarily. For instance, the occurance + of a <blockquote> tag should _not_ implicitly close the previous + <blockquote> tag. + + Alice said: <blockquote>Bob said: <blockquote>Blah + should NOT be transformed into: + Alice said: <blockquote>Bob said: </blockquote><blockquote>Blah + + Some tags can be nested, but the nesting is reset by the + interposition of other tags. For instance, a <tr> tag should + implicitly close the previous <tr> tag within the same <table>, + but not close a <tr> tag in another table. + + <table><tr>Blah<tr>Blah + should be transformed into: + <table><tr>Blah</tr><tr>Blah + but, + <tr>Blah<table><tr>Blah + should NOT be transformed into + <tr>Blah<table></tr><tr>Blah + + Differing assumptions about tag nesting rules are a major source + of problems with the BeautifulSoup class. If BeautifulSoup is not + treating as nestable a tag your page author treats as nestable, + try ICantBelieveItsBeautifulSoup, MinimalSoup, or + BeautifulStoneSoup before writing your own subclass.""" + + def __init__(self, *args, **kwargs): + if not kwargs.has_key('smartQuotesTo'): + kwargs['smartQuotesTo'] = self.HTML_ENTITIES + kwargs['isHTML'] = True + BeautifulStoneSoup.__init__(self, *args, **kwargs) + + SELF_CLOSING_TAGS = buildTagMap(None, + ('br' , 'hr', 'input', 'img', 'meta', + 'spacer', 'link', 'frame', 'base', 'col')) + + PRESERVE_WHITESPACE_TAGS = set(['pre', 'textarea']) + + QUOTE_TAGS = {'script' : None, 'textarea' : None} + + #According to the HTML standard, each of these inline tags can + #contain another tag of the same type. Furthermore, it's common + #to actually use these tags this way. + NESTABLE_INLINE_TAGS = ('span', 'font', 'q', 'object', 'bdo', 'sub', 'sup', + 'center') + + #According to the HTML standard, these block tags can contain + #another tag of the same type. Furthermore, it's common + #to actually use these tags this way. + NESTABLE_BLOCK_TAGS = ('blockquote', 'div', 'fieldset', 'ins', 'del') + + #Lists can contain other lists, but there are restrictions. + NESTABLE_LIST_TAGS = { 'ol' : [], + 'ul' : [], + 'li' : ['ul', 'ol'], + 'dl' : [], + 'dd' : ['dl'], + 'dt' : ['dl'] } + + #Tables can contain other tables, but there are restrictions. + NESTABLE_TABLE_TAGS = {'table' : [], + 'tr' : ['table', 'tbody', 'tfoot', 'thead'], + 'td' : ['tr'], + 'th' : ['tr'], + 'thead' : ['table'], + 'tbody' : ['table'], + 'tfoot' : ['table'], + } + + NON_NESTABLE_BLOCK_TAGS = ('address', 'form', 'p', 'pre') + + #If one of these tags is encountered, all tags up to the next tag of + #this type are popped. + RESET_NESTING_TAGS = buildTagMap(None, NESTABLE_BLOCK_TAGS, 'noscript', + NON_NESTABLE_BLOCK_TAGS, + NESTABLE_LIST_TAGS, + NESTABLE_TABLE_TAGS) + + NESTABLE_TAGS = buildTagMap([], NESTABLE_INLINE_TAGS, NESTABLE_BLOCK_TAGS, + NESTABLE_LIST_TAGS, NESTABLE_TABLE_TAGS) + + # Used to detect the charset in a META tag; see start_meta + CHARSET_RE = re.compile("((^|;)\s*charset=)([^;]*)", re.M) + + def start_meta(self, attrs): + """Beautiful Soup can detect a charset included in a META tag, + try to convert the document to that charset, and re-parse the + document from the beginning.""" + httpEquiv = None + contentType = None + contentTypeIndex = None + tagNeedsEncodingSubstitution = False + + for i in range(0, len(attrs)): + key, value = attrs[i] + key = key.lower() + if key == 'http-equiv': + httpEquiv = value + elif key == 'content': + contentType = value + contentTypeIndex = i + + if httpEquiv and contentType: # It's an interesting meta tag. + match = self.CHARSET_RE.search(contentType) + if match: + if (self.declaredHTMLEncoding is not None or + self.originalEncoding == self.fromEncoding): + # An HTML encoding was sniffed while converting + # the document to Unicode, or an HTML encoding was + # sniffed during a previous pass through the + # document, or an encoding was specified + # explicitly and it worked. Rewrite the meta tag. + def rewrite(match): + return match.group(1) + "%SOUP-ENCODING%" + newAttr = self.CHARSET_RE.sub(rewrite, contentType) + attrs[contentTypeIndex] = (attrs[contentTypeIndex][0], + newAttr) + tagNeedsEncodingSubstitution = True + else: + # This is our first pass through the document. + # Go through it again with the encoding information. + newCharset = match.group(3) + if newCharset and newCharset != self.originalEncoding: + self.declaredHTMLEncoding = newCharset + self._feed(self.declaredHTMLEncoding) + raise StopParsing + pass + tag = self.unknown_starttag("meta", attrs) + if tag and tagNeedsEncodingSubstitution: + tag.containsSubstitutions = True + +class StopParsing(Exception): + pass + +class ICantBelieveItsBeautifulSoup(BeautifulSoup): + + """The BeautifulSoup class is oriented towards skipping over + common HTML errors like unclosed tags. However, sometimes it makes + errors of its own. For instance, consider this fragment: + + <b>Foo<b>Bar</b></b> + + This is perfectly valid (if bizarre) HTML. However, the + BeautifulSoup class will implicitly close the first b tag when it + encounters the second 'b'. It will think the author wrote + "<b>Foo<b>Bar", and didn't close the first 'b' tag, because + there's no real-world reason to bold something that's already + bold. When it encounters '</b></b>' it will close two more 'b' + tags, for a grand total of three tags closed instead of two. This + can throw off the rest of your document structure. The same is + true of a number of other tags, listed below. + + It's much more common for someone to forget to close a 'b' tag + than to actually use nested 'b' tags, and the BeautifulSoup class + handles the common case. This class handles the not-co-common + case: where you can't believe someone wrote what they did, but + it's valid HTML and BeautifulSoup screwed up by assuming it + wouldn't be.""" + + I_CANT_BELIEVE_THEYRE_NESTABLE_INLINE_TAGS = \ + ('em', 'big', 'i', 'small', 'tt', 'abbr', 'acronym', 'strong', + 'cite', 'code', 'dfn', 'kbd', 'samp', 'strong', 'var', 'b', + 'big') + + I_CANT_BELIEVE_THEYRE_NESTABLE_BLOCK_TAGS = ('noscript',) + + NESTABLE_TAGS = buildTagMap([], BeautifulSoup.NESTABLE_TAGS, + I_CANT_BELIEVE_THEYRE_NESTABLE_BLOCK_TAGS, + I_CANT_BELIEVE_THEYRE_NESTABLE_INLINE_TAGS) + +class MinimalSoup(BeautifulSoup): + """The MinimalSoup class is for parsing HTML that contains + pathologically bad markup. It makes no assumptions about tag + nesting, but it does know which tags are self-closing, that + <script> tags contain Javascript and should not be parsed, that + META tags may contain encoding information, and so on. + + This also makes it better for subclassing than BeautifulStoneSoup + or BeautifulSoup.""" + + RESET_NESTING_TAGS = buildTagMap('noscript') + NESTABLE_TAGS = {} + +class BeautifulSOAP(BeautifulStoneSoup): + """This class will push a tag with only a single string child into + the tag's parent as an attribute. The attribute's name is the tag + name, and the value is the string child. An example should give + the flavor of the change: + + <foo><bar>baz</bar></foo> + => + <foo bar="baz"><bar>baz</bar></foo> + + You can then access fooTag['bar'] instead of fooTag.barTag.string. + + This is, of course, useful for scraping structures that tend to + use subelements instead of attributes, such as SOAP messages. Note + that it modifies its input, so don't print the modified version + out. + + I'm not sure how many people really want to use this class; let me + know if you do. Mainly I like the name.""" + + def popTag(self): + if len(self.tagStack) > 1: + tag = self.tagStack[-1] + parent = self.tagStack[-2] + parent._getAttrMap() + if (isinstance(tag, Tag) and len(tag.contents) == 1 and + isinstance(tag.contents[0], NavigableString) and + not parent.attrMap.has_key(tag.name)): + parent[tag.name] = tag.contents[0] + BeautifulStoneSoup.popTag(self) + +#Enterprise class names! It has come to our attention that some people +#think the names of the Beautiful Soup parser classes are too silly +#and "unprofessional" for use in enterprise screen-scraping. We feel +#your pain! For such-minded folk, the Beautiful Soup Consortium And +#All-Night Kosher Bakery recommends renaming this file to +#"RobustParser.py" (or, in cases of extreme enterprisiness, +#"RobustParserBeanInterface.class") and using the following +#enterprise-friendly class aliases: +class RobustXMLParser(BeautifulStoneSoup): + pass +class RobustHTMLParser(BeautifulSoup): + pass +class RobustWackAssHTMLParser(ICantBelieveItsBeautifulSoup): + pass +class RobustInsanelyWackAssHTMLParser(MinimalSoup): + pass +class SimplifyingSOAPParser(BeautifulSOAP): + pass + +###################################################### +# +# Bonus library: Unicode, Dammit +# +# This class forces XML data into a standard format (usually to UTF-8 +# or Unicode). It is heavily based on code from Mark Pilgrim's +# Universal Feed Parser. It does not rewrite the XML or HTML to +# reflect a new encoding: that happens in BeautifulStoneSoup.handle_pi +# (XML) and BeautifulSoup.start_meta (HTML). + +# Autodetects character encodings. +# Download from http://chardet.feedparser.org/ +try: + import chardet +# import chardet.constants +# chardet.constants._debug = 1 +except ImportError: + chardet = None + +# cjkcodecs and iconv_codec make Python know about more character encodings. +# Both are available from http://cjkpython.i18n.org/ +# They're built in if you use Python 2.4. +try: + import cjkcodecs.aliases +except ImportError: + pass +try: + import iconv_codec +except ImportError: + pass + +class UnicodeDammit: + """A class for detecting the encoding of a *ML document and + converting it to a Unicode string. If the source encoding is + windows-1252, can replace MS smart quotes with their HTML or XML + equivalents.""" + + # This dictionary maps commonly seen values for "charset" in HTML + # meta tags to the corresponding Python codec names. It only covers + # values that aren't in Python's aliases and can't be determined + # by the heuristics in find_codec. + CHARSET_ALIASES = { "macintosh" : "mac-roman", + "x-sjis" : "shift-jis" } + + def __init__(self, markup, overrideEncodings=[], + smartQuotesTo='xml', isHTML=False): + self.declaredHTMLEncoding = None + self.markup, documentEncoding, sniffedEncoding = \ + self._detectEncoding(markup, isHTML) + self.smartQuotesTo = smartQuotesTo + self.triedEncodings = [] + if markup == '' or isinstance(markup, unicode): + self.originalEncoding = None + self.unicode = unicode(markup) + return + + u = None + for proposedEncoding in overrideEncodings: + u = self._convertFrom(proposedEncoding) + if u: break + if not u: + for proposedEncoding in (documentEncoding, sniffedEncoding): + u = self._convertFrom(proposedEncoding) + if u: break + + # If no luck and we have auto-detection library, try that: + if not u and chardet and not isinstance(self.markup, unicode): + u = self._convertFrom(chardet.detect(self.markup)['encoding']) + + # As a last resort, try utf-8 and windows-1252: + if not u: + for proposed_encoding in ("utf-8", "windows-1252"): + u = self._convertFrom(proposed_encoding) + if u: break + + self.unicode = u + if not u: self.originalEncoding = None + + def _subMSChar(self, orig): + """Changes a MS smart quote character to an XML or HTML + entity.""" + sub = self.MS_CHARS.get(orig) + if isinstance(sub, tuple): + if self.smartQuotesTo == 'xml': + sub = '&#x%s;' % sub[1] + else: + sub = '&%s;' % sub[0] + return sub + + def _convertFrom(self, proposed): + proposed = self.find_codec(proposed) + if not proposed or proposed in self.triedEncodings: + return None + self.triedEncodings.append(proposed) + markup = self.markup + + # Convert smart quotes to HTML if coming from an encoding + # that might have them. + if self.smartQuotesTo and proposed.lower() in("windows-1252", + "iso-8859-1", + "iso-8859-2"): + markup = re.compile("([\x80-\x9f])").sub \ + (lambda(x): self._subMSChar(x.group(1)), + markup) + + try: + # print "Trying to convert document to %s" % proposed + u = self._toUnicode(markup, proposed) + self.markup = u + self.originalEncoding = proposed + except Exception, e: + # print "That didn't work!" + # print e + return None + #print "Correct encoding: %s" % proposed + return self.markup + + def _toUnicode(self, data, encoding): + '''Given a string and its encoding, decodes the string into Unicode. + %encoding is a string recognized by encodings.aliases''' + + # strip Byte Order Mark (if present) + if (len(data) >= 4) and (data[:2] == '\xfe\xff') \ + and (data[2:4] != '\x00\x00'): + encoding = 'utf-16be' + data = data[2:] + elif (len(data) >= 4) and (data[:2] == '\xff\xfe') \ + and (data[2:4] != '\x00\x00'): + encoding = 'utf-16le' + data = data[2:] + elif data[:3] == '\xef\xbb\xbf': + encoding = 'utf-8' + data = data[3:] + elif data[:4] == '\x00\x00\xfe\xff': + encoding = 'utf-32be' + data = data[4:] + elif data[:4] == '\xff\xfe\x00\x00': + encoding = 'utf-32le' + data = data[4:] + newdata = unicode(data, encoding) + return newdata + + def _detectEncoding(self, xml_data, isHTML=False): + """Given a document, tries to detect its XML encoding.""" + xml_encoding = sniffed_xml_encoding = None + try: + if xml_data[:4] == '\x4c\x6f\xa7\x94': + # EBCDIC + xml_data = self._ebcdic_to_ascii(xml_data) + elif xml_data[:4] == '\x00\x3c\x00\x3f': + # UTF-16BE + sniffed_xml_encoding = 'utf-16be' + xml_data = unicode(xml_data, 'utf-16be').encode('utf-8') + elif (len(xml_data) >= 4) and (xml_data[:2] == '\xfe\xff') \ + and (xml_data[2:4] != '\x00\x00'): + # UTF-16BE with BOM + sniffed_xml_encoding = 'utf-16be' + xml_data = unicode(xml_data[2:], 'utf-16be').encode('utf-8') + elif xml_data[:4] == '\x3c\x00\x3f\x00': + # UTF-16LE + sniffed_xml_encoding = 'utf-16le' + xml_data = unicode(xml_data, 'utf-16le').encode('utf-8') + elif (len(xml_data) >= 4) and (xml_data[:2] == '\xff\xfe') and \ + (xml_data[2:4] != '\x00\x00'): + # UTF-16LE with BOM + sniffed_xml_encoding = 'utf-16le' + xml_data = unicode(xml_data[2:], 'utf-16le').encode('utf-8') + elif xml_data[:4] == '\x00\x00\x00\x3c': + # UTF-32BE + sniffed_xml_encoding = 'utf-32be' + xml_data = unicode(xml_data, 'utf-32be').encode('utf-8') + elif xml_data[:4] == '\x3c\x00\x00\x00': + # UTF-32LE + sniffed_xml_encoding = 'utf-32le' + xml_data = unicode(xml_data, 'utf-32le').encode('utf-8') + elif xml_data[:4] == '\x00\x00\xfe\xff': + # UTF-32BE with BOM + sniffed_xml_encoding = 'utf-32be' + xml_data = unicode(xml_data[4:], 'utf-32be').encode('utf-8') + elif xml_data[:4] == '\xff\xfe\x00\x00': + # UTF-32LE with BOM + sniffed_xml_encoding = 'utf-32le' + xml_data = unicode(xml_data[4:], 'utf-32le').encode('utf-8') + elif xml_data[:3] == '\xef\xbb\xbf': + # UTF-8 with BOM + sniffed_xml_encoding = 'utf-8' + xml_data = unicode(xml_data[3:], 'utf-8').encode('utf-8') + else: + sniffed_xml_encoding = 'ascii' + pass + except: + xml_encoding_match = None + xml_encoding_match = re.compile( + '^<\?.*encoding=[\'"](.*?)[\'"].*\?>').match(xml_data) + if not xml_encoding_match and isHTML: + regexp = re.compile('<\s*meta[^>]+charset=([^>]*?)[;\'">]', re.I) + xml_encoding_match = regexp.search(xml_data) + if xml_encoding_match is not None: + xml_encoding = xml_encoding_match.groups()[0].lower() + if isHTML: + self.declaredHTMLEncoding = xml_encoding + if sniffed_xml_encoding and \ + (xml_encoding in ('iso-10646-ucs-2', 'ucs-2', 'csunicode', + 'iso-10646-ucs-4', 'ucs-4', 'csucs4', + 'utf-16', 'utf-32', 'utf_16', 'utf_32', + 'utf16', 'u16')): + xml_encoding = sniffed_xml_encoding + return xml_data, xml_encoding, sniffed_xml_encoding + + + def find_codec(self, charset): + return self._codec(self.CHARSET_ALIASES.get(charset, charset)) \ + or (charset and self._codec(charset.replace("-", ""))) \ + or (charset and self._codec(charset.replace("-", "_"))) \ + or charset + + def _codec(self, charset): + if not charset: return charset + codec = None + try: + codecs.lookup(charset) + codec = charset + except (LookupError, ValueError): + pass + return codec + + EBCDIC_TO_ASCII_MAP = None + def _ebcdic_to_ascii(self, s): + c = self.__class__ + if not c.EBCDIC_TO_ASCII_MAP: + emap = (0,1,2,3,156,9,134,127,151,141,142,11,12,13,14,15, + 16,17,18,19,157,133,8,135,24,25,146,143,28,29,30,31, + 128,129,130,131,132,10,23,27,136,137,138,139,140,5,6,7, + 144,145,22,147,148,149,150,4,152,153,154,155,20,21,158,26, + 32,160,161,162,163,164,165,166,167,168,91,46,60,40,43,33, + 38,169,170,171,172,173,174,175,176,177,93,36,42,41,59,94, + 45,47,178,179,180,181,182,183,184,185,124,44,37,95,62,63, + 186,187,188,189,190,191,192,193,194,96,58,35,64,39,61,34, + 195,97,98,99,100,101,102,103,104,105,196,197,198,199,200, + 201,202,106,107,108,109,110,111,112,113,114,203,204,205, + 206,207,208,209,126,115,116,117,118,119,120,121,122,210, + 211,212,213,214,215,216,217,218,219,220,221,222,223,224, + 225,226,227,228,229,230,231,123,65,66,67,68,69,70,71,72, + 73,232,233,234,235,236,237,125,74,75,76,77,78,79,80,81, + 82,238,239,240,241,242,243,92,159,83,84,85,86,87,88,89, + 90,244,245,246,247,248,249,48,49,50,51,52,53,54,55,56,57, + 250,251,252,253,254,255) + import string + c.EBCDIC_TO_ASCII_MAP = string.maketrans( \ + ''.join(map(chr, range(256))), ''.join(map(chr, emap))) + return s.translate(c.EBCDIC_TO_ASCII_MAP) + + MS_CHARS = { '\x80' : ('euro', '20AC'), + '\x81' : ' ', + '\x82' : ('sbquo', '201A'), + '\x83' : ('fnof', '192'), + '\x84' : ('bdquo', '201E'), + '\x85' : ('hellip', '2026'), + '\x86' : ('dagger', '2020'), + '\x87' : ('Dagger', '2021'), + '\x88' : ('circ', '2C6'), + '\x89' : ('permil', '2030'), + '\x8A' : ('Scaron', '160'), + '\x8B' : ('lsaquo', '2039'), + '\x8C' : ('OElig', '152'), + '\x8D' : '?', + '\x8E' : ('#x17D', '17D'), + '\x8F' : '?', + '\x90' : '?', + '\x91' : ('lsquo', '2018'), + '\x92' : ('rsquo', '2019'), + '\x93' : ('ldquo', '201C'), + '\x94' : ('rdquo', '201D'), + '\x95' : ('bull', '2022'), + '\x96' : ('ndash', '2013'), + '\x97' : ('mdash', '2014'), + '\x98' : ('tilde', '2DC'), + '\x99' : ('trade', '2122'), + '\x9a' : ('scaron', '161'), + '\x9b' : ('rsaquo', '203A'), + '\x9c' : ('oelig', '153'), + '\x9d' : '?', + '\x9e' : ('#x17E', '17E'), + '\x9f' : ('Yuml', ''),} + +####################################################################### + + +#By default, act as an HTML pretty-printer. +if __name__ == '__main__': + import sys + soup = BeautifulSoup(sys.stdin) + print soup.prettify()
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/LocationFile.py Fri Sep 28 11:35:56 2012 -0400 @@ -0,0 +1,69 @@ +#!/usr/bin/env python + +import sys + +def die( message ): + print >> sys.stderr, message + sys.exit(1) + +def open_or_die( filename, mode='r', message=None ): + if message is None: + message = 'Error opening {0}'.format( filename ) + try: + fh = open( filename, mode ) + except IOError, err: + die( '{0}: {1}'.format( message, err.strerror ) ) + return fh + +class LocationFile( object ): + def __init__( self, filename, comment_chars=None, delimiter='\t', key_column=0 ): + self.filename = filename + if comment_chars is None: + self.comment_chars = ( '#' ) + else: + self.comment_chars = tuple( comment_chars ) + self.delimiter = delimiter + self.key_column = key_column + self._map = {} + self._populate_map() + + def _populate_map( self ): + try: + with open( self.filename ) as fh: + line_number = 0 + for line in fh: + line_number += 1 + line = line.rstrip( '\r\n' ) + if not line.startswith( self.comment_chars ): + elems = line.split( self.delimiter ) + if len( elems ) <= self.key_column: + die( 'Location file {0} line {1}: less than {2} columns'.format( self.filename, line_number, self.key_column + 1 ) ) + else: + key = elems.pop( self.key_column ) + if key in self._map: + if self._map[key] != elems: + die( 'Location file {0} line {1}: duplicate key "{2}"'.format( self.filename, line_number, key ) ) + else: + self._map[key] = elems + except IOError, err: + die( 'Error opening location file {0}: {1}'.format( self.filename, err.strerror ) ) + + def get_values( self, key ): + if key in self._map: + rval = self._map[key] + if len( rval ) == 1: + return rval[0] + else: + return rval + else: + die( 'key "{0}" not found in location file {1}'.format( key, self.filename ) ) + + def get_values_if_exists( self, key ): + if key in self._map: + rval = self._map[key] + if len( rval ) == 1: + return rval[0] + else: + return rval + else: + return None
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/OrderedDict.py Fri Sep 28 11:35:56 2012 -0400 @@ -0,0 +1,259 @@ +# http://code.activestate.com/recipes/576693/ +# Backport of OrderedDict() class that runs on Python 2.4, 2.5, 2.6, 2.7 and pypy. +# Passes Python2.7's test suite and incorporates all the latest updates. + +try: + from thread import get_ident as _get_ident +except ImportError: + from dummy_thread import get_ident as _get_ident + +try: + from _abcoll import KeysView, ValuesView, ItemsView +except ImportError: + pass + + +class OrderedDict(dict): + 'Dictionary that remembers insertion order' + # An inherited dict maps keys to values. + # The inherited dict provides __getitem__, __len__, __contains__, and get. + # The remaining methods are order-aware. + # Big-O running times for all methods are the same as for regular dictionaries. + + # The internal self.__map dictionary maps keys to links in a doubly linked list. + # The circular doubly linked list starts and ends with a sentinel element. + # The sentinel element never gets deleted (this simplifies the algorithm). + # Each link is stored as a list of length three: [PREV, NEXT, KEY]. + + def __init__(self, *args, **kwds): + '''Initialize an ordered dictionary. Signature is the same as for + regular dictionaries, but keyword arguments are not recommended + because their insertion order is arbitrary. + + ''' + if len(args) > 1: + raise TypeError('expected at most 1 arguments, got %d' % len(args)) + try: + self.__root + except AttributeError: + self.__root = root = [] # sentinel node + root[:] = [root, root, None] + self.__map = {} + self.__update(*args, **kwds) + + def __setitem__(self, key, value, dict_setitem=dict.__setitem__): + 'od.__setitem__(i, y) <==> od[i]=y' + # Setting a new item creates a new link which goes at the end of the linked + # list, and the inherited dictionary is updated with the new key/value pair. + if key not in self: + root = self.__root + last = root[0] + last[1] = root[0] = self.__map[key] = [last, root, key] + dict_setitem(self, key, value) + + def __delitem__(self, key, dict_delitem=dict.__delitem__): + 'od.__delitem__(y) <==> del od[y]' + # Deleting an existing item uses self.__map to find the link which is + # then removed by updating the links in the predecessor and successor nodes. + dict_delitem(self, key) + link_prev, link_next, key = self.__map.pop(key) + link_prev[1] = link_next + link_next[0] = link_prev + + def __iter__(self): + 'od.__iter__() <==> iter(od)' + root = self.__root + curr = root[1] + while curr is not root: + yield curr[2] + curr = curr[1] + + def __reversed__(self): + 'od.__reversed__() <==> reversed(od)' + root = self.__root + curr = root[0] + while curr is not root: + yield curr[2] + curr = curr[0] + + def clear(self): + 'od.clear() -> None. Remove all items from od.' + try: + for node in self.__map.itervalues(): + del node[:] + root = self.__root + root[:] = [root, root, None] + self.__map.clear() + except AttributeError: + pass + dict.clear(self) + + def popitem(self, last=True): + '''od.popitem() -> (k, v), return and remove a (key, value) pair. + Pairs are returned in LIFO order if last is true or FIFO order if false. + + ''' + if not self: + raise KeyError('dictionary is empty') + root = self.__root + if last: + link = root[0] + link_prev = link[0] + link_prev[1] = root + root[0] = link_prev + else: + link = root[1] + link_next = link[1] + root[1] = link_next + link_next[0] = root + key = link[2] + del self.__map[key] + value = dict.pop(self, key) + return key, value + + # -- the following methods do not depend on the internal structure -- + + def keys(self): + 'od.keys() -> list of keys in od' + return list(self) + + def values(self): + 'od.values() -> list of values in od' + return [self[key] for key in self] + + def items(self): + 'od.items() -> list of (key, value) pairs in od' + return [(key, self[key]) for key in self] + + def iterkeys(self): + 'od.iterkeys() -> an iterator over the keys in od' + return iter(self) + + def itervalues(self): + 'od.itervalues -> an iterator over the values in od' + for k in self: + yield self[k] + + def iteritems(self): + 'od.iteritems -> an iterator over the (key, value) items in od' + for k in self: + yield (k, self[k]) + + def update(*args, **kwds): + '''od.update(E, **F) -> None. Update od from dict/iterable E and F. + + If E is a dict instance, does: for k in E: od[k] = E[k] + If E has a .keys() method, does: for k in E.keys(): od[k] = E[k] + Or if E is an iterable of items, does: for k, v in E: od[k] = v + In either case, this is followed by: for k, v in F.items(): od[k] = v + + ''' + if len(args) > 2: + raise TypeError('update() takes at most 2 positional ' + 'arguments (%d given)' % (len(args),)) + elif not args: + raise TypeError('update() takes at least 1 argument (0 given)') + self = args[0] + # Make progressively weaker assumptions about "other" + other = () + if len(args) == 2: + other = args[1] + if isinstance(other, dict): + for key in other: + self[key] = other[key] + elif hasattr(other, 'keys'): + for key in other.keys(): + self[key] = other[key] + else: + for key, value in other: + self[key] = value + for key, value in kwds.items(): + self[key] = value + + __update = update # let subclasses override update without breaking __init__ + + __marker = object() + + def pop(self, key, default=__marker): + '''od.pop(k[,d]) -> v, remove specified key and return the corresponding value. + If key is not found, d is returned if given, otherwise KeyError is raised. + + ''' + if key in self: + result = self[key] + del self[key] + return result + if default is self.__marker: + raise KeyError(key) + return default + + def setdefault(self, key, default=None): + 'od.setdefault(k[,d]) -> od.get(k,d), also set od[k]=d if k not in od' + if key in self: + return self[key] + self[key] = default + return default + + def __repr__(self, _repr_running={}): + 'od.__repr__() <==> repr(od)' + call_key = id(self), _get_ident() + if call_key in _repr_running: + return '...' + _repr_running[call_key] = 1 + try: + if not self: + return '%s()' % (self.__class__.__name__,) + return '%s(%r)' % (self.__class__.__name__, self.items()) + finally: + del _repr_running[call_key] + + def __reduce__(self): + 'Return state information for pickling' + items = [[k, self[k]] for k in self] + inst_dict = vars(self).copy() + for k in vars(OrderedDict()): + inst_dict.pop(k, None) + if inst_dict: + return (self.__class__, (items,), inst_dict) + return self.__class__, (items,) + + def copy(self): + 'od.copy() -> a shallow copy of od' + return self.__class__(self) + + @classmethod + def fromkeys(cls, iterable, value=None): + '''OD.fromkeys(S[, v]) -> New ordered dictionary with keys from S + and values equal to v (which defaults to None). + + ''' + d = cls() + for key in iterable: + d[key] = value + return d + + def __eq__(self, other): + '''od.__eq__(y) <==> od==y. Comparison to another OD is order-sensitive + while comparison to a regular mapping is order-insensitive. + + ''' + if isinstance(other, OrderedDict): + return len(self)==len(other) and self.items() == other.items() + return dict.__eq__(self, other) + + def __ne__(self, other): + return not self == other + + # -- the following methods are only used in Python 2.7 -- + + def viewkeys(self): + "od.viewkeys() -> a set-like object providing a view on od's keys" + return KeysView(self) + + def viewvalues(self): + "od.viewvalues() -> an object providing a view on od's values" + return ValuesView(self) + + def viewitems(self): + "od.viewitems() -> a set-like object providing a view on od's items" + return ItemsView(self)
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/Population.py Fri Sep 28 11:35:56 2012 -0400 @@ -0,0 +1,128 @@ +#!/usr/bin/env python + +from OrderedDict import OrderedDict + +class Individual(object): + __slots__ = ['_column', '_name', '_alias'] + + def __init__(self, column, name, alias=None): + self._column = column + self._name = name + self._alias = alias + + @property + def column(self): + return self._column + + @property + def name(self): + return self._name if self._alias is None else self._alias + + @property + def alias(self): + return self._alias + + @alias.setter + def alias(self, alias): + self._alias = alias + + @property + def real_name(self): + return self._name + + def __eq__(self, other): + return self._column == other._column and self._name == other._name + + def __ne__(self, other): + return not self.__eq__(other) + + def __repr__(self): + return 'Individual: column={0} name={1} alias={2}'.format(self._column, self._name, self._alias) + + +class Population(object): + def __init__(self, name=None): + self._columns = OrderedDict() + self._name = name + + @property + def name(self): + return self._name + + @name.setter + def name(self, name): + self._name = name + + def add_individual(self, individual, alias=None): + if individual.column not in self._columns: + self._columns[individual.column] = individual + elif self._columns[individual.column] == individual: + # should should this be an error? + # should we replace the alias using this entry? + pass + else: + raise 'Duplicate column: {0}'.format(individual) + + def is_superset(self, other): + for column, other_individual in other._columns.items(): + our_individual = self._columns.get(column) + if our_individual is None or our_individual != other_individual: + return False + return True + + def is_disjoint(self, other): + for column, our_individual in self._columns.items(): + other_individual = other._columns.get(column) + if other_individual is not None and other_individual == our_individual: + return False + return True + + def column_list(self): + return self._columns.keys() + + def individual_with_column(self, column): + if column in self._columns: + return self._columns[column] + return None + + def tag_list(self, delimiter=':'): + entries = [] + for column, individual in self._columns.items(): + entry = '{0}{1}{2}'.format(column, delimiter, individual.name) + entries.append(entry) + return entries + + def to_string(self, delimiter=':', separator=' ', replace_names_with=None): + entries = [] + for column, individual in self._columns.items(): + value = individual.name + if replace_names_with is not None: + value = replace_names_with + entry = '{0}{1}{2}'.format(column, delimiter, value) + entries.append(entry) + return separator.join(entries) + + def __str__(self): + return self.to_string() + + def from_population_file(self, filename): + with open(filename) as fh: + for line in fh: + line = line.rstrip('\r\n') + column, name, alias = line.split('\t') + alias = alias.strip() + individual = Individual(column, name) + if alias: + individual.alias = alias + self.add_individual(individual) + + def from_tag_list(self, tag_list): + for tag in tag_list: + column, name = tag.split(':') + individual = Individual(column, name) + self.add_individual(individual) + + def individual_names(self): + for column, individual in self._columns.items(): + yield individual.name +
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/README Fri Sep 28 11:35:56 2012 -0400 @@ -0,0 +1,13 @@ +Source code for the executables needed by these tools can be found in +the genome_diversity directory. + +Additionally, you'll need the following python modules: + matplotlib (we used version 1.1.0) http://pypi.python.org/packages/source/m/matplotlib/ + mechanize (we used version 0.2.5) http://pypi.python.org/packages/source/m/mechanize/ + networkx (we used version 1.6) http://pypi.python.org/packages/source/n/networkx/ + +And the following software: + ADMIXTURE (we used version 1.22) http://www.genetics.ucla.edu/software/admixture/ + EIGENSOFT (we used version 3.0) http://genepath.med.harvard.edu/~reich/Software.htm + PHAST (we used version 1.2.1) http://compgen.bscb.cornell.edu/phast/ + QuickTree (we used version 1.1) http://www.sanger.ac.uk/resources/software/quicktree/
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/add_fst_column.py Fri Sep 28 11:35:56 2012 -0400 @@ -0,0 +1,69 @@ +#!/usr/bin/env python + +# <command interpreter="python"> +# add_fst_column.py "$input" "$p1_input" "$p2_input" "$data_source.choice" "$data_source.min_value" "$retain" "$discard_fixed" "$biased" "$output" +# #for $individual, $individual_col in zip($input.dataset.metadata.individual_names, $input.dataset.metadata.individual_columns) +# #set $arg = '%s:%s' % ($individual_col, $individual) +# "$arg" +# #end for +# </command> + +import sys +import subprocess +from Population import Population + +################################################################################ + +if len(sys.argv) < 12: + print >> sys.stderr, "Usage" + sys.exit(1) + +input, p1_input, p2_input, genotypes, min_reads, min_qual, retain, discard_fixed, biased, output = sys.argv[1:11] +individual_metadata = sys.argv[11:] + +p_total = Population() +p_total.from_tag_list(individual_metadata) + +p1 = Population() +p1.from_population_file(p1_input) +if not p_total.is_superset(p1): + print >> sys.stderr, 'There is an individual in population 1 that is not in the SNP table' + sys.exit(1) + +p2 = Population() +p2.from_population_file(p2_input) +if not p_total.is_superset(p2): + print >> sys.stderr, 'There is an individual in population 2 that is not in the SNP table' + sys.exit(1) + +################################################################################ + +prog = 'Fst_column' + +args = [] +args.append(prog) +args.append(input) +args.append(genotypes) +args.append(min_reads) +args.append(min_qual) +args.append(retain) +args.append(discard_fixed) +args.append(biased) + +columns = p1.column_list() +for column in columns: + args.append('{0}:1'.format(column)) + +columns = p2.column_list() +for column in columns: + args.append('{0}:2'.format(column)) + +fh = open(output, 'w') + +#print "args:", ' '.join(args) +p = subprocess.Popen(args, bufsize=-1, stdin=None, stdout=fh, stderr=sys.stderr) +rc = p.wait() +fh.close() + +sys.exit(0) +
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/add_fst_column.xml Fri Sep 28 11:35:56 2012 -0400 @@ -0,0 +1,91 @@ +<tool id="gd_add_fst_column" name="Per-SNP FSTs" version="1.0.0"> + <description>: Compute a fixation index score for each SNP</description> + + <command interpreter="python"> + add_fst_column.py "$input" "$p1_input" "$p2_input" "$data_source" "$min_reads" "$min_qual" "$retain" "$discard_fixed" "$biased" "$output" + #for $individual, $individual_col in zip($input.dataset.metadata.individual_names, $input.dataset.metadata.individual_columns) + #set $arg = '%s:%s' % ($individual_col, $individual) + "$arg" + #end for + </command> + + <inputs> + <param name="input" type="data" format="gd_snp" label="SNP table" /> + <param name="p1_input" type="data" format="gd_indivs" label="Population 1 individuals" /> + <param name="p2_input" type="data" format="gd_indivs" label="Population 2 individuals" /> + + <param name="data_source" type="select" format="integer" label="Data source"> + <option value="0" selected="true">sequence coverage</option> + <option value="1">estimated genotype</option> + </param> + + <param name="min_reads" type="integer" min="0" value="0" label="Minimum total read count for a population" /> + <param name="min_qual" type="integer" min="0" value="0" label="Minimum individual genotype quality" /> + + <param name="retain" type="select" label="Special treatment"> + <option value="0" selected="true">Skip row</option> + <option value="1">Set FST = -1</option> + </param> + + <param name="discard_fixed" type="select" label="Apparently fixed SNPs"> + <option value="0">Retain SNPs that appear fixed in the two populations</option> + <option value="1" selected="true">Delete SNPs that appear fixed in the two populations</option> + </param> + + <param name="biased" type="select" label="FST estimator"> + <option value="0" selected="true">Wright's original definition</option> + <option value="1">Weir's unbiased estimator</option> + </param> + + </inputs> + + <outputs> + <data name="output" format="gd_snp" metadata_source="input" /> + </outputs> + + <tests> + <test> + <param name="input" value="test_in/sample.gd_snp" ftype="gd_snp" /> + <param name="p1_input" value="test_in/a.gd_indivs" ftype="gd_indivs" /> + <param name="p2_input" value="test_in/b.gd_indivs" ftype="gd_indivs" /> + <param name="data_source" value="0" /> + <param name="min_reads" value="3" /> + <param name="min_qual" value="0" /> + <param name="retain" value="0" /> + <param name="discard_fixed" value="1" /> + <param name="biased" value="0" /> + <output name="output" file="test_out/add_fst_column/add_fst_column.gd_snp" /> + </test> + </tests> + + <help> + +**What it does** + +The user specifies a SNP table and two "populations" of individuals, +both previously defined using the Specify Individuals tool. +No individual can be in both populations. Other choices are as follows. + +Data source. The allele frequencies of a SNP in the two populations can be +estimated either by the total number of reads of each allele, or by adding +the frequencies inferred from genotypes of individuals in the populations. + +After specifying the data source, the user sets lower bounds on amount +of data required at a SNP. For estimating the Fst using read counts, +the bound is the minimum count of reads of the two alleles in a population. +For estimations based on genotype, the bound is the minimum reported genotype +quality per individual. + +The user specifies whether the SNPs that violate the lower bound should be +ignored or the Fst set to -1. + +The user specifies whether SNPs where both populations appear to be fixed +for the same allele should be retained or discarded. + +Finally, the user chooses which definition of Fst to use: Wright's original +definition or Weir's unbiased estimator. + +A column is appended to the SNP table giving the Fst for each retained SNP. + + </help> +</tool>
--- a/aggregate_gd_indivs.xml Fri Sep 28 11:34:31 2012 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,86 +0,0 @@ -<tool id="gd_sum_gd_snp" name="Aggregate Individuals" version="1.0.0"> - <description>: Append summary columns for a population</description> - - <command interpreter="python"> - modify_snp_table.py "$input" "$p1_input" "$output" "-1" "-1" "-1" "-1" - #for $individual, $individual_col in zip($input.dataset.metadata.individual_names, $input.dataset.metadata.individual_columns) - #set $arg = '%s:%s' % ($individual_col, $individual) - "$arg" - #end for - </command> - - <inputs> - <param name="input" type="data" format="gd_snp" label="SNP dataset" /> - <param name="p1_input" type="data" format="gd_indivs" label="Population individuals" /> - </inputs> - - <outputs> - <data name="output" format="gd_snp" metadata_source="input" /> - </outputs> - - <tests> - <test> - <param name="input" value="test_in/sample.gd_snp" ftype="gd_snp" /> - <param name="p1_input" value="test_in/a.gd_indivs" ftype="gd_indivs" /> - <param name="choice" value="1" /> - <param name="lo_coverage" value="0" /> - <param name="hi_coverage" value="1000" /> - <param name="low_ind_cov" value="3" /> - <param name="lo_quality" value="30" /> - <output name="output" file="test_out/modify_snp_table/modify.gd_snp" /> - </test> - </tests> - - <help> - -**Dataset formats** - -The input datasets are in gd_snp_ and gd_indivs_ formats. -The output dataset is in gd_snp_ format. (`Dataset missing?`_) - -.. _gd_snp: ./static/formatHelp.html#gd_snp -.. _gd_indivs: ./static/formatHelp.html#gd_indivs -.. _Dataset missing?: ./static/formatHelp.html - ------ - -**What it does** - -The user specifies that some of the individuals in a gd_snp dataset form a -"population", by supplying a list that has been previously created using the -Specify Individuals tool. The program appends a -new "entity" (set of four columns) to the gd_snp table, analogous to the columns -for an individual but containing summary data for the population as a group. -These four columns give the total counts for the two alleles, the "genotype" for -the population, and the maximum quality value, taken over all individuals in the -population. If all defined genotypes in the population are 2 (agree with the -reference), then the population's genotype is 2, and similarly for 0; otherwise -the genotype is 1 (unless all individuals have undefined genotype, in which case -it is -1). - ------ - -**Example** - -- input gd_snp:: - - Contig161_chr1_4641264_4641879 115 C T 73.5 chr1 4641382 C 6 0 2 45 8 0 2 51 15 0 2 72 5 0 2 42 6 0 2 45 10 0 2 57 Y 54 0.323 0 - Contig48_chr1_10150253_10151311 11 A G 94.3 chr1 10150264 A 1 0 2 30 1 0 2 30 1 0 2 30 3 0 2 36 1 0 2 30 1 0 2 30 Y 22 +99. 0 - Contig20_chr1_21313469_21313570 66 C T 54.0 chr1 21313534 C 4 0 2 39 4 0 2 39 5 0 2 42 4 0 2 39 4 0 2 39 5 0 2 42 N 1 +99. 0 - etc. - -- input individuals:: - - 9 PB1 - 13 PB2 - 17 PB3 - -- output:: - - Contig161_chr1_4641264_4641879 115 C T 73.5 chr1 4641382 C 6 0 2 45 8 0 2 51 15 0 2 72 5 0 2 42 6 0 2 45 10 0 2 57 Y 54 0.323 0 29 0 2 72 - Contig48_chr1_10150253_10151311 11 A G 94.3 chr1 10150264 A 1 0 2 30 1 0 2 30 1 0 2 30 3 0 2 36 1 0 2 30 1 0 2 30 Y 22 +99. 0 3 0 2 30 - Contig20_chr1_21313469_21313570 66 C T 54.0 chr1 21313534 C 4 0 2 39 4 0 2 39 5 0 2 42 4 0 2 39 4 0 2 39 5 0 2 42 N 1 +99. 0 13 0 2 42 - etc. - - </help> -</tool>
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/average_fst.py Fri Sep 28 11:35:56 2012 -0400 @@ -0,0 +1,78 @@ +#!/usr/bin/env python + +import sys +import subprocess +from Population import Population + +################################################################################ + +if len(sys.argv) < 12: + print >> sys.stderr, "Usage" + sys.exit(1) + +input, p1_input, p2_input, data_source, min_total_count, discard_fixed, biased, output, shuffles, p0_input = sys.argv[1:11] +individual_metadata = sys.argv[11:] + +try: + shuffle_count = int(shuffles) +except: + shuffle_count = 0 + +p_total = Population() +p_total.from_tag_list(individual_metadata) + +p1 = Population() +p1.from_population_file(p1_input) +if not p_total.is_superset(p1): + print >> sys.stderr, 'There is an individual in population 1 that is not in the SNP table' + sys.exit(1) + +p2 = Population() +p2.from_population_file(p2_input) +if not p_total.is_superset(p2): + print >> sys.stderr, 'There is an individual in population 2 that is not in the SNP table' + sys.exit(1) + +p0 = None +if shuffle_count > 0: + p0 = Population() + p0.from_population_file(p0_input) + if not p_total.is_superset(p0): + print >> sys.stderr, 'There is an individual in population 0 that is not in the SNP table' + sys.exit(1) + +################################################################################ + +prog = 'Fst_ave' + +args = [] +args.append(prog) +args.append(input) +args.append(data_source) +args.append(min_total_count) +args.append(discard_fixed) +args.append(biased) +args.append(shuffles) + +columns = p1.column_list() +for column in columns: + args.append('{0}:1'.format(column)) + +columns = p2.column_list() +for column in columns: + args.append('{0}:2'.format(column)) + +if p0 is not None: + columns = p0.column_list() + for column in columns: + args.append('{0}:0'.format(column)) + +fh = open(output, 'w') + +#print "args:", ' '.join(args) +p = subprocess.Popen(args, bufsize=-1, stdin=None, stdout=fh, stderr=sys.stderr) +rc = p.wait() +fh.close() + +sys.exit(0) +
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/average_fst.xml Fri Sep 28 11:35:56 2012 -0400 @@ -0,0 +1,113 @@ +<tool id="gd_average_fst" name="Overall FST" version="1.0.0"> + <description>: Estimate the relative fixation index between two populations</description> + + <command interpreter="python"> + average_fst.py "$input" "$p1_input" "$p2_input" "$data_source.ds_choice" "$data_source.min_value" "$discard_fixed" "$biased" "$output" + #if $use_randomization.ur_choice == '1' + "$use_randomization.shuffles" "$use_randomization.p0_input" + #else + "0" "/dev/null" + #end if + #for $individual, $individual_col in zip($input.dataset.metadata.individual_names, $input.dataset.metadata.individual_columns) + #set $arg = '%s:%s' % ($individual_col, $individual) + "$arg" + #end for + </command> + + <inputs> + <param name="input" type="data" format="gd_snp" label="SNP table" /> + <param name="p1_input" type="data" format="gd_indivs" label="Population 1 individuals" /> + <param name="p2_input" type="data" format="gd_indivs" label="Population 2 individuals" /> + + <conditional name="data_source"> + <param name="ds_choice" type="select" format="integer" label="Data source"> + <option value="0" selected="true">sequence coverage and ..</option> + <option value="1">estimated genotype and ..</option> + </param> + <when value="0"> + <param name="min_value" type="integer" min="1" value="1" label="Minimum total read count for a population" /> + </when> + <when value="1"> + <param name="min_value" type="integer" min="1" value="1" label="Minimum individual genotype quality" /> + </when> + </conditional> + + <param name="discard_fixed" type="select" label="Apparently fixed SNPs"> + <option value="0">Retain SNPs that appear fixed in the two populations</option> + <option value="1" selected="true">Delete SNPs that appear fixed in the two populations</option> + </param> + + <param name="biased" type="select" label="FST estimator"> + <option value="0" selected="true">Wright's original definition</option> + <option value="1">Weir's unbiased estimator</option> + </param> + + <conditional name="use_randomization"> + <param name="ur_choice" type="select" format="integer" label="Use randomization"> + <option value="0" selected="true">No</option> + <option value="1">Yes</option> + </param> + <when value="0" /> + <when value="1"> + <param name="shuffles" type="integer" min="0" value="0" label="Shuffles" /> + <param name="p0_input" type="data" format="gd_indivs" label="Individuals for randomization" /> + </when> + </conditional> + </inputs> + + <outputs> + <data name="output" format="txt" /> + </outputs> + + <tests> + <test> + <param name="input" value="test_in/sample.gd_snp" ftype="gd_snp" /> + <param name="p1_input" value="test_in/a.gd_indivs" ftype="gd_indivs" /> + <param name="p2_input" value="test_in/b.gd_indivs" ftype="gd_indivs" /> + <param name="ds_choice" value="0" /> + <param name="min_value" value="3" /> + <param name="discard_fixed" value="1" /> + <param name="biased" value="0" /> + <param name="ur_choice" value="0" /> + <output name="output" file="test_out/average_fst/average_fst.txt" /> + </test> + </tests> + + <help> + +**What it does** + +The user specifies a SNP table and two "populations" of individuals, +both previously defined using the Specify Individuals tool. +No individual can be in both populations. Other choices are as follows. + +Data source. The allele frequencies of a SNP in the two populations can be +estimated either by the total number of reads of each allele, or by adding +the frequencies inferred from genotypes of individuals in the populations. + +After specifying the data source, the user sets lower bounds on amount +of data required at a SNP. For estimating the Fst using read counts, +the bound is the minimum count of reads of the two alleles in a population. +For estimations based on genotype, the bound is the minimum reported genotype +quality per individual. SNPs not meeting these lower bounds are ignored. + +The user specifies whether SNPs where both populations appear to be fixed +for the same allele should be retained or discarded. + +The user chooses which definition of Fst to use: Wright's original definition +or Weir's unbiased estimator. + +Finally, the user decides whether to use randomizations. If so, then the +user specifies how many randomly generated population pairs (retaining +the numbers of individuals of the originals) to generate, as well as the +"population" of additional individuals (not in the first two populations) +that can be used in the randomization process. + +The program prints the average Fst for the original populations and the +number of SNPs used to compute it. If randomizations were requested, +it prints the average Fst for each randomly generated population pair, +ending with a summary that includes the maximum and average value, and the +highest-scoring population pair. + + </help> +</tool>
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/calclenchange.py Fri Sep 28 11:35:56 2012 -0400 @@ -0,0 +1,280 @@ +#!/usr/bin/env python +# -*- coding: utf-8 -*- +# +# calclenchange.py +# +# Copyright 2011 Oscar Bedoya-Reina <oscar@niska.bx.psu.edu> +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program; if not, write to the Free Software +# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, +# MA 02110-1301, USA. + +import argparse,mechanize,os,sys +from decimal import Decimal,getcontext +from xml.etree.ElementTree import ElementTree,tostring +import networkx as nx +from copy import copy + +#method to rank the the pthways by mut. freq. +def rankdN(ltfreqs): + ordvals=sorted(ltfreqs)#sort and reverse freqs. + #~ + outrnk=[] + tmpChng0,tmpOri,tmpMut,tmpPthw=ordvals.pop()#the highest possible value + if tmpOri=='C': + if tmpMut!='C': + tmpChng0='C-%s'%tmpMut + else: + tmpChng0=Decimal('0') + crank=1 + outrnk.append([str(tmpChng0),str(tmpOri),str(tmpMut),str(crank),tmpPthw]) + totalnvals=len(ordvals) + cnt=0 + while totalnvals>cnt: + cnt+=1 + tmpChng,tmpOri,tmpMut,tmpPthw=ordvals.pop() + if tmpOri=='C': + if tmpMut!='C': + tmpChng='C-%s'%tmpMut + else: + tmpChng=Decimal('0') + if tmpChng!=tmpChng0: + crank=len(outrnk)+1 + tmpChng0=tmpChng + outrnk.append([str(tmpChng),str(tmpOri),str(tmpMut),str(crank),tmpPthw]) + return outrnk + +#method to rank the the pthways by mut. freq. +def rankdAvr(ltfreqs): + ordvals=sorted(ltfreqs)#sort and reverse freqs. + #~ + outrnk={} + tmpChng0,tmpOri,tmpMut,tmpPthw=ordvals.pop()#the highest possible value + if tmpOri=='I': + if tmpMut!='I': + tmpChng0='I-%s'%tmpMut + else: + tmpChng0=Decimal('0') + crank=1 + outrnk[tmpPthw]='\t'.join([str(tmpChng0),str(tmpOri),str(tmpMut),str(crank)]) + totalnvals=len(ordvals) + cnt=0 + while totalnvals>cnt: + cnt+=1 + tmpChng,tmpOri,tmpMut,tmpPthw=ordvals.pop() + if tmpOri=='I': + if tmpMut!='I': + tmpChng='I-%s'%tmpMut + else: + tmpChng=Decimal('0') + if tmpChng!=tmpChng0: + crank=len(outrnk)+1 + tmpChng0=tmpChng + outrnk[tmpPthw]='\t'.join([str(tmpChng),str(tmpOri),str(tmpMut),str(crank)]) + return outrnk + +#this method takes as input a list of pairs of edges(beginNod,endNod) and returns a list of nodes with indegree 0 and outdegree 0 +def returnstartanendnodes(edges): + listID0st=set()#starts + listOD0en=set()#end + for beginNod,endNod in edges:# O(n) + listID0st.add(beginNod) + listOD0en.add(endNod) + startNdsID0=listID0st.difference(listOD0en) + endNdsOD0=listOD0en.difference(listID0st) + return startNdsID0,endNdsOD0 + +#~ Method to return nodes and edges +def returnNodesNEdgesfKXML(fpthwKGXML): + #~ + tree = ElementTree() + ptree=tree.parse(fpthwKGXML) + #~ + title=ptree.get('title') + prots=ptree.findall('entry') + reactns=ptree.findall('reaction') + #~ + edges,ndstmp=set(),set() + nreactns=len(reactns) + cr=0#count reacts + while nreactns>cr: + cr+=1 + reactn=reactns.pop() + mainid=reactn.get('id') + ndstmp.add(mainid)#add node + reacttyp=reactn.get('type') + sbstrts=reactn.findall('substrate') + while len(sbstrts)>0: + csbstrt=sbstrts.pop() + csbtsid=csbstrt.get('id') + ndstmp.add(csbtsid)#add node + if reacttyp=='irreversible': + edges.add((csbtsid,mainid))#add edges + elif reacttyp=='reversible': + edges.add((mainid,csbtsid))#add edges + edges.add((csbtsid,mainid))#add edges + #~ + prdcts=reactn.findall('product') + while len(prdcts)>0: + prdct=prdcts.pop() + prodctid=prdct.get('id') + ndstmp.add(prodctid)#add node + if reacttyp=='irreversible': + edges.add((mainid,prodctid))#add edges + elif reacttyp=='reversible': + edges.add((mainid,prodctid))#add edges + edges.add((prodctid,mainid))#add edges + #~ Nodes + nprots=len(prots) + cp=0#count prots + dnodes={} + while nprots>cp: + cp+=1 + prot=prots.pop() + tmpProtnm=prot.get('id') + if tmpProtnm in ndstmp: + dnodes[prot.get('id')]=set(prot.get('name').split())#each genename for each Id + return dnodes,edges,title + +#~ make calculation on pathways +def rtrnAvrgLen(edges,strNds,endNds): + wG=nx.DiGraph()#reference graph + wG.add_edges_from(edges) + dPairsSrcSnks=nx.all_pairs_shortest_path_length(wG)#dictionary between sources and sink and length + nstartNdsID0=len(strNds) + cstrtNds=0 + nPaths=0 + lPathLen=[] + while nstartNdsID0>cstrtNds: + cStartNd=strNds.pop()#current start node + dEndNdsLen=dPairsSrcSnks.pop(cStartNd) + for cendNd in dEndNdsLen: + if cendNd in endNds: + lPathLen.append(dEndNdsLen[cendNd]) + nPaths+=1 + cstrtNds+=1 + AvrgPthLen=0 + if nPaths!=0: + AvrgPthLen=Decimal(sum(lPathLen))/Decimal(str(nPaths)) + return nPaths,AvrgPthLen + +def main(): + parser = argparse.ArgumentParser(description='Rank pathways based on the change in length and number of paths connecting sources and sinks.') + parser.add_argument('--loc_file',metavar='correlational database',type=str,help='correlational database') + parser.add_argument('--species',metavar='species name',type=str,help='the species of interest in loc_file') + parser.add_argument('--output',metavar='output TXT file',type=str,help='the output file with the table in txt format. Column 1 is the diference between column 2 and column 3, Column 2 is the pathway average length (between sources and sinks) including the genes in the input list, Column 3 is the pathway average length EXCLUDING the genes in the input list, Column 4 is the rank based on column 1. Column 5 is the diference between column 6 and column 7, Column 6 is the number of paths between sources and sinks, including the genes in the input list, Column 7 is the number of paths between sources and sinks EXCLUDING the genes in the input list, Column 8 is the rank based on column 5. Column 9 I the pathway name' ) + parser.add_argument('--posKEGGclmn',metavar='column number',type=int,help='the column with the KEGG pathway code/name') + parser.add_argument('--KEGGgeneposcolmn',metavar='column number',type=int,help='column with the KEGG gene code') + parser.add_argument('--input',metavar='input TXT file',type=str,help='the input file with the table in txt format') + #~ + #~Open arguments + class C(object): + pass + fulargs=C() + parser.parse_args(sys.argv[1:],namespace=fulargs) + #test input vars + inputf,loc_file,species,output,posKEGGclmn,Kgeneposcolmn=fulargs.input,fulargs.loc_file,fulargs.species,fulargs.output,fulargs.posKEGGclmn,fulargs.KEGGgeneposcolmn + posKEGGclmn-=1#correct pos + Kgeneposcolmn-=1 + #~ Get the extra variables + crDB=[x.split() for x in open(loc_file).read().splitlines() if x.split()[0]==species][0] + sppPrefx,dinput=crDB[1],crDB[2] + #~ set decimal positions + getcontext().prec = 3 + #make a dictionary of valid genes + dKEGGcPthws=dict([(x.split('\t')[Kgeneposcolmn],set([y.split('=')[0] for y in x.split('\t')[posKEGGclmn].split('.')])) for x in open(inputf).read().splitlines()[1:] if x.strip()]) + sdGenes=set([x for x in dKEGGcPthws.keys() if x.find('.')>-1]) + while True:#to crrect names with more than one gene + try: + mgenes=sdGenes.pop() + pthwsAssotd=dKEGGcPthws.pop(mgenes) + mgenes=mgenes.split('.') + for eachg in mgenes: + dKEGGcPthws[eachg]=pthwsAssotd + except: + break + #~ + lPthwsF=[x for x in os.listdir(dinput) if x.find('.xml')>-1 if x not in ['cfa04070.xml']] + nPthws=len(lPthwsF) + cPthw=0 + lPthwPthN=[]#the output list for number of paths + lPthwPthAvr=[]#the output list for the length of paths + #~ + while cPthw<nPthws: + cPthw+=1 + KEGGpathw=lPthwsF.pop() + comdKEGGpathw=KEGGpathw.split('.')[0] + tmpddGenrcgenPresent=set() + sKEGGc=dKEGGcPthws.keys() + lsKEGGc=len(sKEGGc) + ctPthw=0 + while ctPthw < lsKEGGc:#to save memory + eachK=sKEGGc.pop() + alPthws=dKEGGcPthws[eachK] + if comdKEGGpathw in alPthws: + tmpddGenrcgenPresent.add(':'.join([sppPrefx,eachK])) + ctPthw+=1 + #~ Make graph calculations + dnodes,edges,title=returnNodesNEdgesfKXML(open(os.path.join(dinput,KEGGpathw))) + startNdsID0,endNdsOD0=returnstartanendnodes(edges) + startNdsOri=copy(startNdsID0) + #~ + nPaths='C'#stands for circuit + AvrgPthLen='I'#stand for infinite + if len(startNdsID0)>0 and len(endNdsOD0)>0: + nPaths,AvrgPthLen=rtrnAvrgLen(edges,startNdsID0,endNdsOD0) + #~ work with the genes in the list + genestodel=set() + lnodes=len(dnodes) + sNds=set(dnodes) + ctPthw=0 + while ctPthw<lnodes: + ctPthw+=1 + cNod=sNds.pop() + sgenes=dnodes.pop(cNod) + if len(sgenes.intersection(tmpddGenrcgenPresent))==len(sgenes): + genestodel.add(cNod) + #~ del nodes from graph edges + wnPaths,wAvrgPthLen=copy(nPaths),copy(AvrgPthLen) + if len(genestodel)>0: + wedges=set([x for x in edges if len(set(x).intersection(genestodel))==0]) + wstartNds,wendNds=returnstartanendnodes(wedges) + if nPaths!='C': + wstartNds=[x for x in wstartNds if x in startNdsOri] + wendNds=[x for x in wendNds if x in endNdsOD0] + if len(wstartNds)>0 and len(wendNds)>0: + wnPaths,wAvrgPthLen=rtrnAvrgLen(wedges,wstartNds,wendNds) + #~ Calculate the differences + orNP,mutNP,oriLen,mutLen=nPaths,wnPaths,AvrgPthLen,wAvrgPthLen + if nPaths=='C': + orNP=Decimal('1000') + oriLen=Decimal('1000') + if wnPaths=='C': + mutNP=Decimal('1000') + mutLen=Decimal('1000') + lPthwPthN.append([orNP-mutNP,nPaths,wnPaths,'='.join([comdKEGGpathw,title])])#print nPaths,AvrgPthLen + lPthwPthAvr.append([oriLen-mutLen,AvrgPthLen,wAvrgPthLen,'='.join([comdKEGGpathw,title])])#print nPaths,AvrgPthLen + doutrnkPthN=rankdN(lPthwPthN) + doutrnkPthAvr=rankdAvr(lPthwPthAvr) + #~ + sall=['\t'.join([doutrnkPthAvr[x[4]],'\t'.join(x)]) for x in doutrnkPthN] + salef=open(output,'w') + salef.write('\n'.join(sall)) + salef.close() + return 0 + + +if __name__ == '__main__': + main() +
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/calctfreq.py Fri Sep 28 11:35:56 2012 -0400 @@ -0,0 +1,114 @@ +#!/usr/bin/env python +# -*- coding: utf-8 -*- +# +# calcfreq.py +# +# Copyright 2011 Oscar Bedoya-Reina <oscar@niska.bx.psu.edu> +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program; if not, write to the Free Software +# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, +# MA 02110-1301, USA. + +import argparse,os,sys +from decimal import Decimal,getcontext +from LocationFile import LocationFile + +#method to rank the the pthways by mut. freq. +def rankd(ltfreqs): + ordvals=sorted(ltfreqs)#sort and reverse freqs. + #~ + outrnk=[] + tmpFreq0,tmpCount,tmpPthw=ordvals.pop()#the highest possible value + crank=1 + outrnk.append('\t'.join([str(tmpCount),str(tmpFreq0),str(crank),tmpPthw])) + totalnvals=len(ordvals) + cnt=0 + while totalnvals>cnt: + cnt+=1 + tmpFreq,tmpCount,tmpPthw=ordvals.pop() + if tmpFreq!=tmpFreq0: + crank=len(outrnk)+1 + tmpFreq0=tmpFreq + outrnk.append('\t'.join([str(tmpCount),str(tmpFreq),str(crank),tmpPthw])) + return outrnk + + +def main(): + parser = argparse.ArgumentParser(description='Obtain KEGG images from a list of genes.') + parser.add_argument('--input',metavar='input TXT file',type=str,help='the input file with the table in txt format') + parser.add_argument('--output',metavar='output TXT file',type=str,help='the output file with the table in txt format. Column 1 is the count of genes in the list, Column 2 is the percentage of the pathway genes present on the list. Column 3 is the rank based on column 2') + parser.add_argument('--posKEGGclmn',metavar='column number',type=int,help='the column with the KEGG pathway code/name') + parser.add_argument('--KEGGgeneposcolmn',metavar='column number',type=int,help='column with the KEGG gene code') + parser.add_argument('--loc_file',metavar='location file',type=str,help='location file') + parser.add_argument('--species',metavar='species',type=str,help='species') + #~Open arguments + class C(object): + pass + fulargs=C() + parser.parse_args(sys.argv[1:],namespace=fulargs) + #test input vars + inputf,outputf,posKEGGclmn,Kgeneposcolmn=fulargs.input,fulargs.output,fulargs.posKEGGclmn,fulargs.KEGGgeneposcolmn + locf,species=fulargs.loc_file,fulargs.species + #make a dictionary of valid genes + posKEGGclmn-=1 + Kgeneposcolmn-=1 + dKEGGcPthws=dict([(x.split('\t')[Kgeneposcolmn],set(x.split('\t')[posKEGGclmn].split('.'))) for x in open(inputf).read().splitlines()[1:] if x.strip()]) + sdGenes=set([x for x in dKEGGcPthws.keys() if x.find('.')>-1]) + while True:#to correct names with more than one gene + try: + mgenes=sdGenes.pop() + pthwsAssotd=dKEGGcPthws.pop(mgenes) + mgenes=mgenes.split('.') + for eachg in mgenes: + dKEGGcPthws[eachg]=pthwsAssotd + except: + break + #~ Count genes + getcontext().prec=2#set 2 decimal places + + location_file = LocationFile(locf) + prefix, kxml_dir_path, dict_file = location_file.get_values(species) + dPthContsTotls = {} + try: + with open(dict_file) as fh: + for line in fh: + line = line.rstrip('\r\n') + value, key = line.split('\t') + dPthContsTotls[key] = int(value) + except IOError, err: + print >> sys.stderr, 'Error opening dict file {0}: {1}'.format(dict_file, err.strerror) + sys.exit(1) + + dPthContsTmp=dict([(x,0) for x in dPthContsTotls.keys()])#create a list of genes + sdGenes=set([x for x in dKEGGcPthws.keys()])#list of all genes + cntGens=0 + ltGens=len(sdGenes) + while cntGens<ltGens: + cGen=sdGenes.pop() + sKEGGcPthws=dKEGGcPthws.pop(cGen) + for eachP in sKEGGcPthws: + if eachP!='N': + dPthContsTmp[eachP]+=1 + cntGens+=1 + #~ Calculate Freqs. + ltfreqs=[((Decimal(dPthContsTmp[x])/Decimal(dPthContsTotls[x])),Decimal(dPthContsTmp[x]),x) for x in dPthContsTotls] + tabllfreqs='\n'.join(rankd(ltfreqs)) + salef=open(outputf,'w') + salef.write(tabllfreqs) + salef.close() + return 0 + + +if __name__ == '__main__': + main()
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/cdblib.py Fri Sep 28 11:35:56 2012 -0400 @@ -0,0 +1,230 @@ +#!/usr/bin/env python + +''' +Manipulate DJB's Constant Databases. These are 2 level disk-based hash tables +that efficiently handle many keys, while remaining space-efficient. + + http://cr.yp.to/cdb.html + +When generated databases are only used with Python code, consider using hash() +rather than djb_hash() for a tidy speedup. +''' + +from _struct import Struct +from itertools import chain + + +def py_djb_hash(s): + '''Return the value of DJB's hash function for the given 8-bit string.''' + h = 5381 + for c in s: + h = (((h << 5) + h) ^ ord(c)) & 0xffffffff + return h + +try: + from _cdblib import djb_hash +except ImportError: + djb_hash = py_djb_hash + +read_2_le4 = Struct('<LL').unpack +write_2_le4 = Struct('<LL').pack + + +class Reader(object): + '''A dictionary-like object for reading a Constant Database accessed + through a string or string-like sequence, such as mmap.mmap().''' + + def __init__(self, data, hashfn=djb_hash): + '''Create an instance reading from a sequence and using hashfn to hash + keys.''' + if len(data) < 2048: + raise IOError('CDB too small') + + self.data = data + self.hashfn = hashfn + + self.index = [read_2_le4(data[i:i+8]) for i in xrange(0, 2048, 8)] + self.table_start = min(p[0] for p in self.index) + # Assume load load factor is 0.5 like official CDB. + self.length = sum(p[1] >> 1 for p in self.index) + + def iteritems(self): + '''Like dict.iteritems(). Items are returned in insertion order.''' + pos = 2048 + while pos < self.table_start: + klen, dlen = read_2_le4(self.data[pos:pos+8]) + pos += 8 + + key = self.data[pos:pos+klen] + pos += klen + + data = self.data[pos:pos+dlen] + pos += dlen + + yield key, data + + def items(self): + '''Like dict.items().''' + return list(self.iteritems()) + + def iterkeys(self): + '''Like dict.iterkeys().''' + return (p[0] for p in self.iteritems()) + __iter__ = iterkeys + + def itervalues(self): + '''Like dict.itervalues().''' + return (p[1] for p in self.iteritems()) + + def keys(self): + '''Like dict.keys().''' + return [p[0] for p in self.iteritems()] + + def values(self): + '''Like dict.values().''' + return [p[1] for p in self.iteritems()] + + def __getitem__(self, key): + '''Like dict.__getitem__().''' + value = self.get(key) + if value is None: + raise KeyError(key) + return value + + def has_key(self, key): + '''Return True if key exists in the database.''' + return self.get(key) is not None + __contains__ = has_key + + def __len__(self): + '''Return the number of records in the database.''' + return self.length + + def gets(self, key): + '''Yield values for key in insertion order.''' + # Truncate to 32 bits and remove sign. + h = self.hashfn(key) & 0xffffffff + start, nslots = self.index[h & 0xff] + + if nslots: + end = start + (nslots << 3) + slot_off = start + (((h >> 8) % nslots) << 3) + + for pos in chain(xrange(slot_off, end, 8), + xrange(start, slot_off, 8)): + rec_h, rec_pos = read_2_le4(self.data[pos:pos+8]) + + if not rec_h: + break + elif rec_h == h: + klen, dlen = read_2_le4(self.data[rec_pos:rec_pos+8]) + rec_pos += 8 + + if self.data[rec_pos:rec_pos+klen] == key: + rec_pos += klen + yield self.data[rec_pos:rec_pos+dlen] + + def get(self, key, default=None): + '''Get the first value for key, returning default if missing.''' + # Avoid exception catch when handling default case; much faster. + return chain(self.gets(key), (default,)).next() + + def getint(self, key, default=None, base=0): + '''Get the first value for key converted it to an int, returning + default if missing.''' + value = self.get(key, default) + if value is not default: + return int(value, base) + return value + + def getints(self, key, base=0): + '''Yield values for key in insertion order after converting to int.''' + return (int(v, base) for v in self.gets(key)) + + def getstring(self, key, default=None, encoding='utf-8'): + '''Get the first value for key decoded as unicode, returning default if + not found.''' + value = self.get(key, default) + if value is not default: + return value.decode(encoding) + return value + + def getstrings(self, key, encoding='utf-8'): + '''Yield values for key in insertion order after decoding as + unicode.''' + return (v.decode(encoding) for v in self.gets(key)) + + +class Writer(object): + '''Object for building new Constant Databases, and writing them to a + seekable file-like object.''' + + def __init__(self, fp, hashfn=djb_hash): + '''Create an instance writing to a file-like object, using hashfn to + hash keys.''' + self.fp = fp + self.hashfn = hashfn + + fp.write('\x00' * 2048) + self._unordered = [[] for i in xrange(256)] + + def put(self, key, value=''): + '''Write a string key/value pair to the output file.''' + assert type(key) is str and type(value) is str + + pos = self.fp.tell() + self.fp.write(write_2_le4(len(key), len(value))) + self.fp.write(key) + self.fp.write(value) + + h = self.hashfn(key) & 0xffffffff + self._unordered[h & 0xff].append((h, pos)) + + def puts(self, key, values): + '''Write more than one value for the same key to the output file. + Equivalent to calling put() in a loop.''' + for value in values: + self.put(key, value) + + def putint(self, key, value): + '''Write an integer as a base-10 string associated with the given key + to the output file.''' + self.put(key, str(value)) + + def putints(self, key, values): + '''Write zero or more integers for the same key to the output file. + Equivalent to calling putint() in a loop.''' + self.puts(key, (str(value) for value in values)) + + def putstring(self, key, value, encoding='utf-8'): + '''Write a unicode string associated with the given key to the output + file after encoding it as UTF-8 or the given encoding.''' + self.put(key, unicode.encode(value, encoding)) + + def putstrings(self, key, values, encoding='utf-8'): + '''Write zero or more unicode strings to the output file. Equivalent to + calling putstring() in a loop.''' + self.puts(key, (unicode.encode(value, encoding) for value in values)) + + def finalize(self): + '''Write the final hash tables to the output file, and write out its + index. The output file remains open upon return.''' + index = [] + for tbl in self._unordered: + length = len(tbl) << 1 + ordered = [(0, 0)] * length + for pair in tbl: + where = (pair[0] >> 8) % length + for i in chain(xrange(where, length), xrange(0, where)): + if not ordered[i][0]: + ordered[i] = pair + break + + index.append((self.fp.tell(), length)) + for pair in ordered: + self.fp.write(write_2_le4(*pair)) + + self.fp.seek(0) + for pair in index: + self.fp.write(write_2_le4(*pair)) + self.fp = None # prevent double finalize()
--- a/commits.log Fri Sep 28 11:34:31 2012 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,40 +0,0 @@ - -:7b775e5b68b4 -cathy 2012-09-28 00:55 -Galaxy didn't like my RST syntax. :-/ - -:9b5b4f73bd98 -cathy 2012-09-28 00:08 -Tweaks by Cathy, e.g. adjusting text where renamed tools are mentioned. -Also riemerized through first section, "Initial Analysis". - -:93eeef51be96 -cathy 2012-09-27 14:03 -Fixed datatype bugs in the Filter SNPs and Aggregate Individuals tools. - -:119e1e904cc4 -cathy 2012-09-26 15:38 -Restored modify_snp_table.py from the archive, since it's still used by the -Filter SNPs and Aggregate Individuals tools. - -:cc508d55cc9d -cathy 2012-09-26 15:16 -Tweaked description for the Prepare Input tool. - -:cdb8430b1659 -cathy 2012-09-26 15:10 -Added ": " at the beginning of each description to separate it from the name. - -:3286bdea6b3d -cathy 2012-09-26 13:01 -Clarified tool names and descriptions. - -:8a9bdfc0d31f -cathy 2012-09-19 17:15 -Edited docs in aggregate_gd_indivs.xml: clarified "What it does", reformatted -example data. - -:f7c6a18af605 -cathy 2012-09-19 11:31 -Edited docs in specify.xml: clarified "What it does", reformatted example data. -
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/coverage_distributions.py Fri Sep 28 11:35:56 2012 -0400 @@ -0,0 +1,187 @@ +#!/usr/bin/env python + +import os +import errno +import sys +import shutil +import subprocess +from Population import Population +import gd_composite + +################################################################################ + +def mkdir_p(path): + try: + os.makedirs(path) + except OSError, e: + if e.errno <> errno.EEXIST: + raise + +################################################################################ + +if len(sys.argv) < 7: + print >> sys.stderr, "Usage" + sys.exit(1) + +input, data_source, output, extra_files_path = sys.argv[1:5] + +individual_metadata = [] +population_info = [] +p1_input = None +all_individuals = False + +for arg in sys.argv[5:]: + if arg == 'all_individuals': + all_individuals = True + elif len(arg) > 12 and arg[:12] == 'individuals:': + p1_input = arg[12:] + elif len(arg) > 11: + if arg[:11] == 'population:': + file, name = arg[11:].split(':', 1) + population_info.append((file, name)) + elif arg[:11] == 'individual:': + individual_metadata.append(arg[11:]) + +p_total = Population() +p_total.from_tag_list(individual_metadata) + +################################################################################ + +mkdir_p(extra_files_path) + +################################################################################ + +prog = 'coverage' + +args = [] +args.append(prog) +args.append(input) +args.append(data_source) + +user_coverage_file = os.path.join(extra_files_path, 'coverage.txt') +args.append(user_coverage_file) + +population_list = [] + +if all_individuals: + tags = p_total.tag_list() +elif p1_input is not None: + p1 = Population() + this_pop = Population() + this_pop.from_population_file(p1_input) + population_list.append(this_pop) + p1.from_population_file(p1_input) + if not p_total.is_superset(p1): + print >> sys.stderr, 'There is an individual in the population that is not in the SNP table' + sys.exit(1) + tags = p1.tag_list() +else: + tags = [] + for population_file, population_name in population_info: + population = Population() + this_pop = Population() + this_pop.from_population_file(population_file) + population_list.append(this_pop) + population.from_population_file(population_file) + if not p_total.is_superset(population): + print >> sys.stderr, 'There is an individual in the {} population that is not in the SNP table'.format(population_name) + sys.exit(1) + columns = population.column_list() + for column in columns: + tags.append('{0}:{1}'.format(column, population_name)) + +for tag in tags: + args.append(tag) + +## text output +coverage_file = 'coverage.txt' +fh = open(coverage_file, 'w') +#print "args:", ' '.join(args) +p = subprocess.Popen(args, bufsize=-1, stdin=None, stdout=fh, stderr=sys.stderr) +rc = p.wait() +fh.close() + +## graphical output +fh = open(coverage_file) +coverage2_file = 'coverage2.txt' +ofh = open(coverage2_file, 'w') + +for line in fh: + line = line.rstrip('\r\n') + elems = line.split('\t') + name = elems.pop(0) + values = [ elems[0] ] + for idx in range(1, len(elems)): + val = str(float(elems[idx]) - float(elems[idx-1])) + values.append(val) + print >> ofh, '{0}\t{1}'.format(name, '\t'.join(values)) + +fh.close() +ofh.close() + +################################################################################ + +prog = 'R' + +args = [] +args.append(prog) +args.append('--vanilla') +args.append('--quiet') + +_realpath = os.path.realpath(__file__) +_script_dir = os.path.dirname(_realpath) +r_script_file = os.path.join(_script_dir, 'coverage_plot.r') + +ifh = open(r_script_file) +ofh = open('/dev/null', 'w') +#print "args:", ' '.join(args) +p = subprocess.Popen(args, bufsize=-1, stdin=ifh, stdout=ofh, stderr=None) +rc = p.wait() +ifh.close() +ofh.close() + +pdf_file = os.path.join(extra_files_path, 'coverage.pdf') +shutil.copy2('coverage.pdf', pdf_file) +os.remove('coverage.pdf') +os.remove(coverage2_file) + +################################################################################ + +info_page = gd_composite.InfoPage() +info_page.set_title('Coverage distributions Galaxy Composite Dataset') + +display_file = gd_composite.DisplayFile() +display_value = gd_composite.DisplayValue() + +out_pdf = gd_composite.Parameter(name='coverage.pdf', value='coverage.pdf', display_type=display_file) +out_txt = gd_composite.Parameter(name='coverage.txt', value='coverage.txt', display_type=display_file) + +info_page.add_output_parameter(out_pdf) +info_page.add_output_parameter(out_txt) + + +if data_source == '0': + data_source_value = 'sequence coverage' +elif data_source == '1': + data_source_value = 'estimated genotype' + +in_data_source = gd_composite.Parameter(description='Data source', value=data_source_value, display_type=display_value) + +info_page.add_input_parameter(in_data_source) + +if population_list: + misc_populations = gd_composite.Parameter(name='Populations', value=population_list, display_type=gd_composite.DisplayPopulationList()) + info_page.add_misc(misc_populations) +else: + misc_individuals = gd_composite.Parameter(name='Individuals', value=tags, display_type=gd_composite.DisplayTagList()) + info_page.add_misc(misc_individuals) + + + + +with open (output, 'w') as ofh: + print >> ofh, info_page.render() + + +sys.exit(0) +
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/coverage_distributions.xml Fri Sep 28 11:35:56 2012 -0400 @@ -0,0 +1,122 @@ +<tool id="gd_coverage_distributions" name="Coverage Distributions" version="1.0.0"> + <description>: Examine sequence coverage for SNPs</description> + + <command interpreter="python"> + coverage_distributions.py "$input" "0" "$output" "$output.files_path" + #if $individuals.choice == '0' + "all_individuals" + #else if $individuals.choice == '1' + #set $arg = 'individuals:%s' % str($individuals.p1_input) + "$arg" + #else if $individuals.choice == '2' + #for $population in $individuals.populations + #set $arg = 'population:%s:%s' % (str($population.p_input), str($population.p_input.name)) + "$arg" + #end for + #end if + #for $individual, $individual_col in zip($input.dataset.metadata.individual_names, $input.dataset.metadata.individual_columns) + #set $individual_arg = 'individual:%s:%s' % ($individual_col, $individual) + "$individual_arg" + #end for + </command> + + <inputs> + <param name="input" type="data" format="gd_snp" label="SNP dataset" /> + + <conditional name="individuals"> + <param name="choice" type="select" label="Compute for"> + <option value="0" selected="true">All individuals</option> + <option value="1">Individuals in a population</option> + <option value="2">Totals of populations</option> + </param> + <when value="0" /> + <when value="1"> + <param name="p1_input" type="data" format="gd_indivs" label="Population individuals" /> + </when> + <when value="2"> + <repeat name="populations" title="Population" min="1"> + <param name="p_input" type="data" format="gd_indivs" label="individuals" /> + </repeat> + </when> + </conditional> + + <!-- + <param name="data_source" type="select" label="Data source"> + <option value="0" selected="true">Sequence coverage</option> + <option value="1">Genotype quality</option> + </param> + --> + </inputs> + + <outputs> + <data name="output" format="html" /> + </outputs> + + <tests> + <test> + <param name="input" value="test_in/sample.gd_snp" ftype="gd_snp" /> + <param name="choice" value="0" /> + <output name="output" file="test_out/coverage_distributions/coverage.html" ftype="html" compare="diff" lines_diff="2"> + <extra_files type="file" name="coverage.pdf" value="test_out/coverage_distributions/coverage.pdf" compare="sim_size" delta = "1000"/> + <extra_files type="file" name="coverage.txt" value="test_out/coverage_distributions/coverage.txt" /> + </output> + </test> + </tests> + + <help> + +**Dataset formats** + +The input dataset is in gd_snp_ format. +The output is a composite dataset, containing both a text table and a PDF plot. +(`Dataset missing?`_) + +.. _gd_snp: ./static/formatHelp.html#gd_snp +.. _Dataset missing?: ./static/formatHelp.html + +----- + +**What it does** + +This tool reports distributions of a SNP reliability indicator, in this case +sequence coverage, for individuals or populations. +The coverage can be computed for all individuals, a subset of individuals, +or totals for populations defined by the Specify Individuals tool. +The results are reported as a text table giving the cumulative distributions, +and as a plot. + +----- + +**Examples** + +- input:: + + chr1 14929 A G 999 21 30 1 127 7 11 1 28 7 29 0 5 2 5 1 17 10 14 1 81 17 74 1 42 15 22 1 125 29 84 1 88 6 10 1 11 30 23 1 79 19 1 2 71 24 0 2 99 41 10 2 2 + chr1 17451 C T 6.88 119 1 2 255 12 0 2 63 35 0 2 59 14 0 2 72 19 1 2 57 101 1 2 255 38 8 1 20 125 0 2 255 13 0 2 62 42 0 2 51 44 0 2 64 26 0 2 108 59 0 2 194 + chr1 30922 G T 999 0 23 0 66 0 0 -1 0 0 0 -1 0 0 0 -1 0 0 2 0 3 0 14 0 39 14 16 1 153 0 45 0 132 6 0 2 48 19 0 2 87 3 0 2 32 0 0 -1 0 0 0 -1 0 + etc. + +- text output:: + + 0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 + John West 0 0 0 0 0 0 0 0 1 1 1 1 2 2 3 3 4 4 5 6 + NA12892 0 2 5 11 20 31 43 55 67 77 84 90 93 96 97 98 99 99 99 99 + NA12891 0 0 0 0 0 1 1 2 3 5 6 9 11 15 19 23 29 35 41 47 + NA12249 1 4 11 23 38 54 68 79 88 93 96 98 99 99 99 99 99 99 99 99 + NA12342 0 0 1 1 2 4 6 9 13 18 23 29 36 43 50 58 65 71 77 82 + KB1 0 0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 1 1 2 2 + ABT 0 0 0 0 0 0 1 1 1 2 3 4 5 6 8 10 12 14 18 21 + NA18507 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 + NA19238 0 0 0 1 2 4 6 10 14 19 25 32 39 47 55 62 69 76 81 86 + NA19239 0 0 0 0 1 1 2 4 5 8 11 15 19 24 31 37 44 51 58 65 + YH 2 4 6 7 8 8 9 10 11 12 14 17 19 22 25 29 32 36 40 45 + KOREAN 0 0 1 1 3 4 5 7 10 12 15 19 22 27 31 37 42 48 54 60 + JPT 0 0 0 0 0 0 0 0 1 1 1 2 2 3 4 5 7 8 10 12 + etc. + +graphical output: + +.. image:: ${static_path}/images/gd_coverage.png + + </help> +</tool>
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/coverage_plot.r Fri Sep 28 11:35:56 2012 -0400 @@ -0,0 +1,31 @@ +x <- read.table('coverage2.txt', skip=1, sep='\t') + +individuals <- dim(x)[1] +max_cov <- dim(x)[2] - 2 +max_val <- max(x[-1]) / 100 +colors <- rainbow(individuals) + +line_width = 3 +xt = t(x) + +xvals <- c(0:max_cov) +values <- as.numeric(as.vector(xt[,1][-1]))/100 + +pdf(file='coverage.pdf', onefile=TRUE, width=10, height=6); + +plot(xvals, values, type='l', ylim=c(0, max_val), xlim=c(0, max_cov), col=colors[1], lwd=line_width, xlab="Coverage", ylab="Proportion") + +if (individuals > 1) { + for (i in 2:individuals) { + values <- as.numeric(as.vector(xt[,i][-1]))/100; + lines(xvals, values, col=colors[i], lwd=line_width); + } +} + + +names <- as.vector(t(x[1])) +legend(x='topright', legend=names, fill=colors, bty='n') + +dev.off() + +
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/datatypes_conf.xml Fri Sep 28 11:35:56 2012 -0400 @@ -0,0 +1,13 @@ +<?xml version="1.0"?> +<datatypes> + <datatype_files> + <datatype_file name="wsf.py"/> + </datatype_files> + <registration> + <datatype extension="gd_indivs" type="galaxy.datatypes.wsf:Individuals" display_in_upload="true"/> + <datatype extension="gd_ped" type="galaxy.datatypes.wsf:Wped" display_in_upload="true"/> + <datatype extension="gd_snp" type="galaxy.datatypes.wsf:GDSnp" display_in_upload="true"/> + <datatype extension="gd_sap" type="galaxy.datatypes.wsf:GDSap" display_in_upload="true"/> + </registration> + <sniffers/> +</datatypes>
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/dpmix.py Fri Sep 28 11:35:56 2012 -0400 @@ -0,0 +1,162 @@ +#!/usr/bin/env python + +import errno +import sys +import os +import subprocess +from Population import Population +import gd_composite +from dpmix_plot import make_dpmix_plot +from LocationFile import LocationFile + +################################################################################ + +def mkdir_p(path): + try: + os.makedirs(path) + except OSError, e: + if e.errno <> errno.EEXIST: + raise + +def run_program(prog, args, stdout_file=None, space_to_tab=False): + #print "args: ", ' '.join(args) + p = subprocess.Popen(args, bufsize=-1, executable=prog, stdin=None, stdout=subprocess.PIPE, stderr=subprocess.PIPE) + (stdoutdata, stderrdata) = p.communicate() + rc = p.returncode + + if stdout_file is not None: + with open(stdout_file, 'w') as ofh: + lines = stdoutdata.split('\n') + for line in lines: + line = line.strip() + if line: + if space_to_tab: + line = line.replace(' ', '\t') + print >> ofh, line + + if rc != 0: + print >> sys.stderr, "FAILED: rc={0}: {1}".format(rc, ' '.join(args)) + print >> sys.stderr, stderrdata + sys.exit(1) + +################################################################################ + +if len(sys.argv) < 15: + print "usage" + sys.exit(1) + +input, data_source, switch_penalty, ap1_input, ap2_input, p_input, output, output2, output2_dir, dbkey, ref_column, galaxy_data_index_dir, heterochromatin_loc_file = sys.argv[1:14] +individual_metadata = sys.argv[14:] + +chrom = 'all' +add_logs = '0' + +loc_path = os.path.join(galaxy_data_index_dir, heterochromatin_loc_file) +location_file = LocationFile(loc_path) +heterochrom_path = location_file.get_values_if_exists(dbkey) +if heterochrom_path is None: + heterochrom_path = '/dev/null' + +population_list = [] + +p_total = Population() +p_total.from_tag_list(individual_metadata) + +ap1 = Population(name='Ancestral population 1') +ap1.from_population_file(ap1_input) +population_list.append(ap1) +if not p_total.is_superset(ap1): + print >> sys.stderr, 'There is an individual in ancestral population 1 that is not in the SNP table' + sys.exit(1) + +ap2 = Population(name='Ancestral population 2') +ap2.from_population_file(ap2_input) +population_list.append(ap2) +if not p_total.is_superset(ap2): + print >> sys.stderr, 'There is an individual in ancestral population 2 that is not in the SNP table' + sys.exit(1) + +p = Population(name='Potentially admixed') +p.from_population_file(p_input) +population_list.append(p) +if not p_total.is_superset(p): + print >> sys.stderr, 'There is an individual in the population that is not in the SNP table' + sys.exit(1) + +mkdir_p(output2_dir) + +################################################################################ +# Create tabular file +################################################################################ + +misc_file = os.path.join(output2_dir, 'misc.txt') + +prog = 'dpmix' +args = [ prog ] +args.append(input) +args.append(ref_column) +args.append(chrom) +args.append(data_source) +args.append(add_logs) +args.append(switch_penalty) +args.append(heterochrom_path) +args.append(misc_file) + +columns = ap1.column_list() +for column in columns: + args.append('{0}:1:{1}'.format(column, ap1.individual_with_column(column).name)) + +columns = ap2.column_list() +for column in columns: + args.append('{0}:2:{1}'.format(column, ap2.individual_with_column(column).name)) + +columns = p.column_list() +for column in columns: + args.append('{0}:0:{1}'.format(column, p.individual_with_column(column).name)) + +run_program(None, args, stdout_file=output, space_to_tab=True) + +################################################################################ +# Create pdf file +################################################################################ + +pdf_file = os.path.join(output2_dir, 'dpmix.pdf') +make_dpmix_plot(dbkey, output, pdf_file, galaxy_data_index_dir) + +################################################################################ +# Create html +################################################################################ + +info_page = gd_composite.InfoPage() +info_page.set_title('dpmix Galaxy Composite Dataset') + +display_file = gd_composite.DisplayFile() +display_value = gd_composite.DisplayValue() + +out_pdf = gd_composite.Parameter(name='dpmix.pdf', value='dpmix.pdf', display_type=display_file) +out_misc = gd_composite.Parameter(name='misc.txt', value='misc.txt', display_type=display_file) + +info_page.add_output_parameter(out_pdf) +info_page.add_output_parameter(out_misc) + +if data_source == '0': + data_source_value = 'sequence coverage' +elif data_source == '1': + data_source_value = 'estimated genotype' + +in_data_source = gd_composite.Parameter(description='Data source', value=data_source_value, display_type=display_value) +in_switch_penalty = gd_composite.Parameter(description='Switch penalty', value=switch_penalty, display_type=display_value) + +info_page.add_input_parameter(in_data_source) +info_page.add_input_parameter(in_switch_penalty) + +misc_populations = gd_composite.Parameter(name='Populations', value=population_list, display_type=gd_composite.DisplayPopulationList()) + +info_page.add_misc(misc_populations) + +with open(output2, 'w') as ofh: + print >> ofh, info_page.render() + +sys.exit(0) + +
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/dpmix.xml Fri Sep 28 11:35:56 2012 -0400 @@ -0,0 +1,93 @@ +<tool id="gd_dpmix" name="Admixture" version="1.0.0"> + <description>: Map genomic intervals resembling specified ancestral populations</description> + + <command interpreter="python"> + dpmix.py "$input" "$data_source" "$switch_penalty" "$ap1_input" "$ap2_input" "$p_input" "$output" "$output2" "$output2.files_path" "$input.dataset.metadata.dbkey" "$input.dataset.metadata.ref" "$GALAXY_DATA_INDEX_DIR" "gd.heterochromatic.loc" + #for $individual, $individual_col in zip($input.dataset.metadata.individual_names, $input.dataset.metadata.individual_columns) + #set $arg = '%s:%s' % ($individual_col, $individual) + "$arg" + #end for + </command> + + <inputs> + <param name="input" type="data" format="gd_snp" label="Dataset"> + <validator type="unspecified_build" message="This dataset does not have a reference species and cannot be used with this tool" /> + </param> + <param name="ap1_input" type="data" format="gd_indivs" label="Ancestral population 1 individuals" /> + <param name="ap2_input" type="data" format="gd_indivs" label="Ancestral population 2 individuals" /> + <param name="p_input" type="data" format="gd_indivs" label="Potentially admixed individuals" /> + + <param name="data_source" type="select" format="integer" label="Data source"> + <option value="0" selected="true">sequence coverage</option> + <option value="1">estimated genotype</option> + </param> + + <param name="switch_penalty" type="integer" min="0" value="10" label="Switch penalty" /> + </inputs> + + <outputs> + <data name="output" format="tabular" /> + <data name="output2" format="html" /> + </outputs> + + <tests> + <test> + <param name="input" value="test_in/sample.gd_snp" ftype="gd_snp" /> + <param name="ap1_input" value="test_in/a.gd_indivs" ftype="gd_indivs" /> + <param name="ap2_input" value="test_in/b.gd_indivs" ftype="gd_indivs" /> + <param name="p_input" value="test_in/c.gd_indivs" ftype="gd_indivs" /> + <param name="data_source" value="0" /> + <param name="switch_penalty" value="10" /> + + <output name="output" file="test_out/dpmix/dpmix.tabular" /> + + <output name="output2" file="test_out/dpmix/dpmix.html" ftype="html" compare="diff" lines_diff="2"> + <extra_files type="file" name="dpmix.pdf" value="test_out/dpmix/dpmix.pdf" compare="sim_size" delta = "10000" /> + <extra_files type="file" name="misc.txt" value="test_out/dpmix/misc.txt" /> + </output> + </test> + </tests> + + <help> + +**Dataset formats** + +The input datasets are in gd_snp_ and gd_indivs_ formats. It is important for +the Individuals datasets to have unique names and for there to be no overlap +between the two populations. Rename these datasets if +needed to make them unique. +There are two output datasets, one tabular_ and one composite. (`Dataset missing?`_) + +.. _gd_snp: ./static/formatHelp.html#gd_snp +.. _gd_indivs: ./static/formatHelp.html#gd_indivs +.. _tabular: ./static/formatHelp.html#tab +.. _Dataset missing?: ./static/formatHelp.html + +----- + +**What it does** + +The user specifies two "ancestral" populations (i.e., sources for +chromosomes) and a set of potentially admixed individuals, and chooses +between the sequence coverage or the estimated genotypes to measure +the similarity of genomic intervals in admixed individuals to the two +classes of ancestral chromosomes. The user also picks a "switch penalty", +typically between 10 and 100. For each potentially admixed individual, +the program divides the genome into three "genotypes": (0) homozygous +for the first ancestral population (i.e., both chromosomes from that +population), (1) heterozygous, or (2) homozygous for the second ancestral +population. Parts of a chromosome that are labeled as "heterochromatic" +are given the non-genotype, 3. Smaller values of the switch penalty +(corresponding to more ancient admixture events) generally lead to the +reconstruction of more frequent changes between genotypes. + +There are two output datasets generated. A tabular dataset with chromosome, +start, stop, and pairs of columns containing the "genotypes" from above +and label from the admixed individual. The second dataset is a composite +dataset with general information from the run and a link to a pdf which +graphically shows the ancestral population along each of the chromosomes. +The second link is to a text file with summary information of the +"genotypes" over the whole genome. + + </help> +</tool>
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/dpmix_plot.py Fri Sep 28 11:35:56 2012 -0400 @@ -0,0 +1,297 @@ +#!/usr/bin/env python + +import os +import sys +import math +import matplotlib as mpl +mpl.use('PDF') +import matplotlib.pyplot as plt +from matplotlib.path import Path +import matplotlib.patches as patches + +################################################################################ + +def build_chrom_len_dict(dbkey, galaxy_data_index_dir): + chrom_len_root = os.path.join(galaxy_data_index_dir, 'shared/ucsc/chrom') + chrom_len_file = '{0}.len'.format(dbkey) + chrom_len_path = os.path.join(chrom_len_root, chrom_len_file) + + chrom_len = {} + + try: + with open(chrom_len_path) as fh: + for line in fh: + line = line.rstrip('\r\n') + elems = line.split() + if len(elems) == 2: + chrom = elems[0] + length = int(elems[1]) + chrom_len[chrom] = length + except: + pass + + return chrom_len + +def parse_input_file(input_file): + chroms = [] + individuals = [] + data = {} + chrom_len = {} + + with open(input_file) as fh: + for line in fh: + line = line.strip() + if line: + elems = line.split() + chrom = elems[0] + p1, p2, state = map(int, elems[1:4]) + id = elems[4] + + if chrom not in chroms: + chroms.append(chrom) + + if id not in individuals: + individuals.append(id) + + data.setdefault(chrom, {}) + data[chrom].setdefault(id, []) + data[chrom][id].append((p1, p2, state)) + + if p2 > chrom_len.setdefault(chrom, 0): + chrom_len[chrom] = p2 + + return chroms, individuals, data, chrom_len + +def check_chroms(chroms, chrom_len, dbkey): + error = 0 + for chrom in chroms: + if chrom not in chrom_len: + print >> sys.stderr, "Can't find length for {0} chromosome {1}".format(dbkey, chrom) + error = 1 + if error: + sys.exit(1) + +def check_data(data, chrom_len, dbkey): + error = 0 + for chrom in data: + chrom_beg = 0 + chrom_end = chrom_len[chrom] + for individual in data[chrom]: + for p1, p2, state in data[chrom][individual]: + if p1 >= p2: + print >> sys.stderr, "Bad data line: begin >= end: {0} {1} {2} {3}".format(chrom, p1, p2, state, individual) + error = 1 + if p1 < chrom_beg or p2 > chrom_end: + print >> sys.stderr, "Bad data line: outside {0} boundaries[{1} - {2}]: {3} {4} {5} {6}".format(dbkey, chrom_beg, chrom_end, chrom, p1, p2, state, individual) + error = 1 + if error: + sys.exit(1) + +def make_rectangle(p1, p2, color, bottom=0.0, top=1.0): + verts = [ + (p1, bottom), # left, bottom + (p1, top), # left, top + (p2, top), # right, top + (p2, bottom), # right, bottom + (0.0, 0.0) # ignored + ] + + codes = [ + Path.MOVETO, + Path.LINETO, + Path.LINETO, + Path.LINETO, + Path.CLOSEPOLY + ] + + path = Path(verts, codes) + return patches.PathPatch(path, facecolor=color, lw=0) + +def make_split_rectangle(p1, p2, top_color, bottom_color): + patch1 = make_rectangle(p1, p2, bottom_color, top=0.5) + patch2 = make_rectangle(p1, p2, top_color, bottom=0.5) + return [patch1, patch2] + +def make_state_rectangle(p1, p2, state, chrom, individual): + if state == 0: + return [ make_rectangle(p1, p2, 'r') ] + elif state == 1: + return make_split_rectangle(p1, p2, 'r', 'g') + elif state == 2: + return [ make_rectangle(p1, p2, 'g') ] + elif state == 3: + return [ make_rectangle(p1, p2, '#c7c7c7') ] + else: + print >> sys.stderr, "Unknown state: {0}: {1} {2} {3} {4}".format(state, chrom, p1, p2, state, individual) + sys.exit(1) + +def nicenum(num, round=False): + if num == 0: + return 0.0 + + exp = int(math.floor(math.log10(num))) + f = num / math.pow(10, exp) + + if round: + if f < 1.5: + nf = 1.0 + elif f < 3.0: + nf = 2.0 + elif f < 7.0: + nf = 5.0 + else: + nf = 10.0 + else: + if f <= 1.0: + nf = 1.0 + elif f <= 2.0: + nf = 2.0 + elif f <= 5.0: + nf = 5.0 + else: + nf = 10.0 + + return nf * pow(10, exp) + +def tick_foo(beg, end, loose=False): + ntick = 10 + + range = nicenum(end - beg, round=False) + d = nicenum(range/(ntick - 1), round=True) + digits = int(math.floor(math.log10(d))) + + if loose: + graph_min = math.floor(beg/d) * d + graph_max = math.ceil(end/d) * d + else: + graph_min = beg + graph_max = end + + nfrac = max([-1 * digits, 0]) + vals = [] + + stop = graph_max + if loose: + stop = graph_max + (0.5 * d) + + x = graph_min + while x <= stop: + vals.append(int(x)) + x += d + + vals = vals[1:] + +# if not loose: +# if vals[-1] < graph_max: +# vals.append(int(graph_max)) + + labels = [] + for val in vals: + labels.append('{0}'.format(int(val/math.pow(10, digits)))) + +# labels.append('{0:.1f}'.format(vals[-1]/math.pow(10, digits))) + + return vals, labels + +################################################################################ + +def make_dpmix_plot(input_dbkey, input_file, output_file, galaxy_data_index_dir): + fs_chrom_len = build_chrom_len_dict(input_dbkey, galaxy_data_index_dir) + chroms, individuals, data, chrom_len = parse_input_file(input_file) + + for chrom in chrom_len.keys(): + if chrom in fs_chrom_len: + chrom_len[chrom] = fs_chrom_len[chrom] + + #check_chroms(chroms, chrom_len, input_dbkey) + check_data(data, chrom_len, input_dbkey) + + ## units below are inches + top_space = 0.10 + chrom_space = 0.25 + chrom_height = 0.25 + ind_space = 0.10 + ind_height = 0.25 + + total_height = 0.0 + at_top = True + for chrom in chroms: + if at_top: + total_height += (top_space + chrom_height) + at_top = False + else: + total_height += (top_space + chrom_space + chrom_height) + + individual_count = 0 + for individual in individuals: + if individual in data[chrom]: + individual_count += 1 + total_height += individual_count * (ind_space + ind_height) + + width = 7.5 + height = math.ceil(total_height) + + bottom = 1.0 + + fig = plt.figure(figsize=(width, height)) + + at_top = True + for_webb = False + + for chrom in chroms: + length = chrom_len[chrom] + vals, labels = tick_foo(0, length) + + if at_top: + bottom -= (top_space + chrom_height)/height + at_top = False + else: + bottom -= (top_space + chrom_space + chrom_height)/height + + if not for_webb: + ax = fig.add_axes([0.0, bottom, 1.0, chrom_height/height]) + plt.axis('off') + plt.text(0.5, 0.5, chrom, fontsize=14, ha='center') + + individual_count = 0 + for individual in individuals: + if individual in data[chrom]: + individual_count += 1 + + i = 0 + for individual in individuals: + if individual in data[chrom]: + i += 1 + + bottom -= (ind_space + ind_height)/height + if not for_webb: + # [left, bottom, width, height] + ax1 = fig.add_axes([0.0, bottom, 0.09, ind_height/height]) + plt.axis('off') + plt.text(1.0, 0.5, individual, fontsize=10, ha='right', va='center') + # [left, bottom, width, height] + ax2 = fig.add_axes([0.10, bottom, 0.88, ind_height/height], frame_on=False) + ax2.set_xlim(0, length) + ax2.set_ylim(0, 1) + if i != individual_count: + plt.axis('off') + else: + if not for_webb: + ax2.tick_params(top=False, left=False, right=False, labelleft=False) + ax2.set_xticks(vals) + ax2.set_xticklabels(labels) + else: + plt.axis('off') + for p1, p2, state in sorted(data[chrom][individual]): + for patch in make_state_rectangle(p1, p2, state, chrom, individual): + ax2.add_patch(patch) + + plt.savefig(output_file) + +################################################################################ + +if __name__ == '__main__': + input_dbkey, input_file, output_file, galaxy_data_index_dir = sys.argv[1:5] + make_dpmix_plot(input_dbkey, input_file, output_file, galaxy_data_index_dir) + sys.exit(0) +
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/echo.bash Fri Sep 28 11:35:56 2012 -0400 @@ -0,0 +1,17 @@ +#!/usr/bin/env bash + +if [ $# -lt 3 ]; then + echo "usage" + exit 1 +fi + +input="$1" +output="$2" +shift 2 + +for individual in "$@"; do + echo "$individual" >> "$output" +done + +exit 0 +
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/evaluate_population_numbers.bash Fri Sep 28 11:35:56 2012 -0400 @@ -0,0 +1,17 @@ +#!/usr/bin/env bash + +if [ $# -ne 3 ]; then + echo "usage" + exit 1 +fi + +input_ped_file="$1" +output_file="$2" +max_populations="$3" + +ADMIXTURE=admixture + +for (( i=1; $i <= $max_populations; i++ )); do + $ADMIXTURE --cv "$input_ped_file" $i 2>&1 | grep CV | perl -ne 's/CV error/CVE/; print;' >> "$output_file" +done +
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/evaluate_population_numbers.xml Fri Sep 28 11:35:56 2012 -0400 @@ -0,0 +1,83 @@ +<tool id="gd_evaluate_population_numbers" name="Population Complexity" version="1.0.0"> + <description>: Evaluate possible numbers of ancestral populations</description> + + <command interpreter="bash"> + evaluate_population_numbers.bash "${input.extra_files_path}/admix.ped" "$output" "$max_populations" + </command> + + <inputs> + <param name="input" type="data" format="gd_ped" label="Dataset" /> + <param name="max_populations" type="integer" min="1" value="5" label="Maximum number of populations" /> + </inputs> + + <outputs> + <data name="output" format="txt" /> + </outputs> + + <!-- + <tests> + <test> + <param name="input" value="fake" ftype="gd_ped" > + <metadata name="base_name" value="admix" /> + <composite_data value="test_out/prepare_population_structure/prepare_population_structure.html" /> + <composite_data value="test_out/prepare_population_structure/admix.ped" /> + <composite_data value="test_out/prepare_population_structure/admix.map" /> + <edit_attributes type="name" value="fake" /> + </param> + <param name="max_populations" value="2" /> + + <output name="output" file="test_out/evaluate_population_numbers/evaluate_population_numbers.txt" /> + </test> + </tests> + --> + + <help> + +**Dataset formats** + +The input dataset is in gd_ped_ format. +The output dataset is text. (`Dataset missing?`_) + +.. _gd_ped: ./static/formatHelp.html#gd_ped +.. _Dataset missing?: ./static/formatHelp.html + +----- + +**What it does** + +The user selects a gd_ped dataset generated by the Prepare Input tool. +For all possible numbers K of ancestral +populations, from 1 up to a user-specified maximum, this tool produces values +that indicate how well the data can be explained as genotypes from individuals +derived from K ancestral populations. These values are computed by a 5-fold +cross-validation procedure, so that a good choice for K will exhibit a low +cross-validation error (CVE) compared with other potential settings for K. + +----- + +**Acknowledgments** + +We use the program "Admixture", downloaded from + +http://www.genetics.ucla.edu/software/admixture/ + +and described in the paper "Fast model-based estimation of ancestry in +unrelated individuals" by David H. Alexander, John Novembre and Kenneth Lange, +Genome Research 19 (2009), pp. 1655-1664. Admixture is called with the "--cv" +flag to produce these values. + +----- + +**Example** + +- output with max populations of 6:: + + CVE (K=1): 1.10120 + CVE (K=2): 1.34683 + CVE (K=3): 1.80611 + CVE (K=4): 1.96339 + CVE (K=5): 1.21522 + CVE (K=6): 0.51501 + + </help> +</tool>
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/extract_flanking_dna.py Fri Sep 28 11:35:56 2012 -0400 @@ -0,0 +1,89 @@ +#!/usr/bin/env python + +import os +import sys +from optparse import OptionParser +import genome_diversity as gd + +def main_function( parse_arguments=None ): + if parse_arguments is None: + parse_arguments = lambda arguments: ( None, arguments ) + def main_decorator( to_decorate ): + def decorated_main( arguments=None ): + if arguments is None: + arguments = sys.argv + options, arguments = parse_arguments( arguments ) + rc = 1 + try: + rc = to_decorate( options, arguments ) + except Exception, err: + sys.stderr.write( 'ERROR: %s\n' % str( err ) ) + traceback.print_exc() + finally: + sys.exit( rc ) + return decorated_main + return main_decorator + +def parse_arguments( arguments ): + parser = OptionParser() + parser.add_option('--input', + type='string', dest='input', + help='file of selected SNPs') + parser.add_option('--output', + type='string', dest='output', + help='output file') + parser.add_option('--snps_loc', + type='string', dest='snps_loc', + help='snps .loc file') + parser.add_option('--scaffold_col', + type="int", dest='scaffold_col', + help='scaffold column in the input file') + parser.add_option('--pos_col', + type="int", dest='pos_col', + help='position column in the input file') + parser.add_option('--output_format', + type="string", dest='output_format', + help='output format, fasta or primer3') + parser.add_option('--species', + type="string", dest='species', + help='species') + return parser.parse_args( arguments[1:] ) + + +@main_function( parse_arguments ) +def main( options, arguments ): + if not options.input: + raise RuntimeError( 'missing --input option' ) + if not options.output: + raise RuntimeError( 'missing --output option' ) + if not options.snps_loc: + raise RuntimeError( 'missing --snps_loc option' ) + if not options.scaffold_col: + raise RuntimeError( 'missing --scaffold_col option' ) + if not options.pos_col: + raise RuntimeError( 'missing --pos_col option' ) + if not options.output_format: + raise RuntimeError( 'missing --output_format option' ) + if not options.species: + raise RuntimeError( 'missing --species option' ) + + snps = gd.SnpFile( filename=options.input, seq_col=int( options.scaffold_col ), pos_col=int( options.pos_col ) ) + + out_fh = gd._openfile( options.output, 'w' ) + + snpcalls_file = gd.get_filename_from_loc( options.species, options.snps_loc ) + file_root, file_ext = os.path.splitext( snpcalls_file ) + snpcalls_index_file = file_root + ".cdb" + snpcalls = gd.SnpcallsFile( data_file=snpcalls_file, index_file=snpcalls_index_file ) + + while snps.next(): + seq, pos = snps.get_seq_pos() + flanking_dna = snpcalls.get_flanking_dna( sequence=seq, position=pos, format=options.output_format ) + if flanking_dna: + out_fh.write( flanking_dna ) + + out_fh.close() + +if __name__ == "__main__": + main() +
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/extract_flanking_dna.xml Fri Sep 28 11:35:56 2012 -0400 @@ -0,0 +1,95 @@ +<tool id="gd_extract_flanking_dna" name="Flanking Sequence" version="1.0.0"> + <description>: Fetch DNA sequence for intervals surrounding the given SNPs</description> + + <command interpreter="python"> + extract_flanking_dna.py "--input=$input" "--output=$output" "--snps_loc=${GALAXY_DATA_INDEX_DIR}/gd.snps.loc" + #if $override_metadata.choice == "0": + "--scaffold_col=${input.metadata.scaffold}" "--pos_col=${input.metadata.pos}" "--species=${input.metadata.species}" + #else + "--scaffold_col=$scaf_col" "--pos_col=$pos_col" "--species=$species" + #end if + "--output_format=$output_format" + </command> + + <inputs> + <param format="tabular" name="input" type="data" label="Selected SNPS dataset"/> + <param name="output_format" type="select" format="integer" label="output format"> + <option value="fasta" selected="true">FastA format</option> + <option value="primer3">Primer3 input</option> + </param> + <conditional name="override_metadata"> + <param name="choice" type="select" format="integer" label="choose columns"> + <option value="0" selected="true">No, get columns from metadata</option> + <option value="1" >Yes, choose columns</option> + </param> + <when value="0" /> + <when value="1"> + <param name="scaf_col" type="data_column" data_ref="input" numerical="false" label="Column with scaffold"/> + <param name="pos_col" type="data_column" data_ref="input" numerical="true" label="Column with position"/> + <param name="species" type="select" label="Choose species"> + <options from_file="gd.species.txt"> + <column name="name" index="1"/> + <column name="value" index="0"/> + </options> + </param> + </when> + </conditional> + </inputs> + + <outputs> + <data format="txt" name="output"/> + </outputs> + + <!-- Need snpcalls files from Webb before uncommenting + <tests> + <test> + <param name="input" value="test_out/select_snps/select_snps.gd_snp" ftype="gd_snp" /> + <param name="output_format" value="primer3" /> + <param name="choice" value="0" /> + <output name="output" file="test_out/extract_flanking_dna/extract_flanking_dna.txt" /> + </test> + </tests> + --> + + <help> + +**What it does** + + This tool reports a DNA segment containing each SNP, with up to 200 nucleotides on + either side of the SNP position, which is indicated by "n". Fewer nucleotides + are reported if the SNP is near an end of the assembled genome fragment. + +----- + +**Example** + +- input file:: + + chr2_75111355_75112576 314 A C L F chr2 75111676 C F 15 4 53 2 9 48 Y 96 0.369 0.355 0.396 0 + chr8_93901796_93905612 2471 A C A A chr8 93904264 A A 8 0 51 10 2 14 Y 961 0.016 0.534 0.114 2 + chr10_7434473_7435447 524 T C S S chr10 7435005 T S 11 5 90 14 0 69 Y 626 0.066 0.406 0.727 0 + chr14_80021455_80022064 138 G A H H chr14 80021593 G H 14 0 69 9 6 124 Y 377 0.118 0.997 0.195 1 + chr15_64470252_64471048 89 G A Y Y chr15 64470341 G Y 5 6 109 14 0 69 Y 312 0.247 0.998 0.393 0 + chr18_48070585_48071386 514 C T E K chr18 48071100 T K 7 7 46 14 0 69 Y 2 0.200 0.032 0.163 0 + chr18_50154905_50155664 304 A G Y C chr18 50155208 A Y 4 2 17 5 1 22 Y 8 0.022 0.996 0.128 0 + chr18_57379354_57380496 315 C T V V chr18 57379669 G V 11 0 60 9 6 62 Y 726 0.118 0.048 0.014 1 + chr19_14240610_14242055 232 C T A V chr19 14240840 C A 18 8 56 15 5 42 Y 73 0.003 0.153 0.835 0 + chr19_39866997_39874915 3117 C T P P chr19 39870110 C P 3 7 65 14 2 32 Y 6 0.321 0.911 0.462 4 + etc. + +- output file:: + + > chr2_75111355_75112576 314 A C + TATCTTCATTTTTATTATAGACTCTCTGAACCAATTTGCCCTGAGGCAGACTTTTTAAAGTACTGTGTAATGTATGAAGTCCTTCTGCTCAAGCAAATCATTGGCATGAAAACAGTTGCAAACTTATTGTGAGAGAAGAGTCCAAGAGTTTTAACAGTCTGTAAGTATATAGCCTGTGAGTTTGATTTCCTTCTTGTTTTTnTTCCAGAAACATGATCAGGGGCAAGTTCTATTGGATATAGTCTTCAAGCATCTTGATTTGACTGAGCGTGACTATTTTGGTTTGCAGTTGACTGACGATTCCACTGATAACCCAGTAAGTTTAAGCTGTTGTCTTTCATTGTCATTGCAATTTTTCTGTCTTTATACTAGGTCCTTTCTGATTTACATTGTTCACTGATT + > chr8_93901796_93905612 2471 A C + GCTGCCGCTGGATTTACTTCTGCTTGGGTCGAGAGCGGGCTGGATGGGTGAAGAGTGGGCTCCCCGGCCCCTGACCAGGCAGGTGCAGACAAGTCGGAAGAAGGCCCGCCGCATCTCCTTGCTGGCCAGCGTGTAGATGACGGGGTTCATGGCAGAGTTGAGCACGGCCAGCACGATGAACCACTGGGCCTTGAACAGGATnGCGCACTCCTTCACCTTGCAGGCCACATCCACAAGGAAAAGGATGAAGAGTGGGGACCAGCAGGCGATGAACACGCTCACCACGATCACCACGGTCCGCAGCAGGGCCATGGACCGCTCTGAGTTGTGCGGGCTGGCCACCCTGCGGCTGCTGGACTTCACCAGGAAGTAGATGCGTGCGTACAGGATCACGATGGTCAC + > chr10_7434473_7435447 524 T C + ATTATTAACAGAAACATTTCTTTTTCATTACCCAGGGGTTACACTGGTCGTTGATGTTAATCAGTTTTTGGAGAAGGAGAAGCAAAGTGATATTTTGTCTGTTCTGAAGCCTGCCGTTGGTAATACAAATGACGTAATCCCTGAATGTGCTGACAGGTACCATGACGCCCTGGCAAAAGCAAAAGAGCAAAAATCTAGAAGnGGTAAGCATCTTCACTGTTTAGCACAAATTAAATAGCACTTTGAATATGATGATTTCTGTGGTATTGTGTTATCTTACTTTTGAGACAAATAATCGCTTTCAAATGAATATTTCTGAATGTTTGTCATCTCTGGCAAGGAAATTTTTTAGTGTTTCTTTTCCTTTTTTGTCTTTTGGAAATCTGTGATTAACTTGGTGGC + > chr14_80021455_80022064 138 G A + ACCCAGGGATCAAACCCAGGTCTCCCGCATTGCAGGCGGATTCTTTACTGTCTGAGCCTCCAGGGAAGCCCTCGGGGCTGAAGGGATGGTTATGAAGGTGAGAAACAGGGGCCACCTGTCCCCAAGGTACCTTGCGACnTGCCATCTGCGCTCCACCAGTAAATGGACGTCTTCGATCCTTCTGTTGTTGGCGTAGTGCAAACGTTTGGGAAGGTGCTGTTTCAAGTAAGGCTTAAAGTGCTGGTCTGGTTTTTTACACTGAAATATAAATGGACATTGGATTTTGCAATGGAGAGTCTTCTAGAAGAGTCCAAGACATTCTCTCCAGAAAGCTGAAGG + > chr15_64470252_64471048 89 G A + TGTGTGTGTGTGTGTGTGTGTGTGCCTGTGTCTGTACATGCACACCACGTGGCCTCACCCAGTGCCCTCAGCTCCATGGTGATGTCCACnTAGCCGTGCTCCGCGCTGTAGTACATGGCCTCCTGGAGGGCCTTGGTGCGCGTCCGGCTCAGGCGCATGGGCCCCTCGCTGCCGCTGCCCTGGCTGGATGCATCGCTCTCTTCCACGCCCTCAGCCAGGATCTCCTCCAGGGACAGCACATCTGCTTTGGCCTGCTGTGGCTGAGTCAGGAGCTTCCTCAGGACGTTCCT + etc. + + </help> +</tool>
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/extract_primers.py Fri Sep 28 11:35:56 2012 -0400 @@ -0,0 +1,85 @@ +#!/usr/bin/env python + +import os +import sys +from optparse import OptionParser +import genome_diversity as gd + +def main_function( parse_arguments=None ): + if parse_arguments is None: + parse_arguments = lambda arguments: ( None, arguments ) + def main_decorator( to_decorate ): + def decorated_main( arguments=None ): + if arguments is None: + arguments = sys.argv + options, arguments = parse_arguments( arguments ) + rc = 1 + try: + rc = to_decorate( options, arguments ) + except Exception, err: + sys.stderr.write( 'ERROR: %s\n' % str( err ) ) + traceback.print_exc() + finally: + sys.exit( rc ) + return decorated_main + return main_decorator + +def parse_arguments( arguments ): + parser = OptionParser() + parser.add_option('--input', + type='string', dest='input', + help='file of selected SNPs') + parser.add_option('--output', + type='string', dest='output', + help='output file') + parser.add_option('--primers_loc', + type='string', dest='primers_loc', + help='primers .loc file') + parser.add_option('--scaffold_col', + type="int", dest='scaffold_col', + help='scaffold column in the input file') + parser.add_option('--pos_col', + type="int", dest='pos_col', + help='position column in the input file') + parser.add_option('--species', + type="string", dest='species', + help='species') + return parser.parse_args( arguments[1:] ) + + +@main_function( parse_arguments ) +def main( options, arguments ): + if not options.input: + raise RuntimeError( 'missing --input option' ) + if not options.output: + raise RuntimeError( 'missing --output option' ) + if not options.primers_loc: + raise RuntimeError( 'missing --primers_loc option' ) + if not options.scaffold_col: + raise RuntimeError( 'missing --scaffold_col option' ) + if not options.pos_col: + raise RuntimeError( 'missing --pos_col option' ) + if not options.species: + raise RuntimeError( 'missing --species option' ) + + snps = gd.SnpFile( filename=options.input, seq_col=int( options.scaffold_col ), pos_col=int( options.pos_col ) ) + + out_fh = gd._openfile( options.output, 'w' ) + + primer_data_file = gd.get_filename_from_loc( options.species, options.primers_loc ) + + file_root, file_ext = os.path.splitext( primer_data_file ) + primer_index_file = file_root + ".cdb" + primers = gd.PrimersFile( data_file=primer_data_file, index_file=primer_index_file ) + + while snps.next(): + seq, pos = snps.get_seq_pos() + primer = primers.get_entry( seq, pos ) + if primer: + out_fh.write( primer ) + + out_fh.close() + +if __name__ == "__main__": + main() +
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/extract_primers.xml Fri Sep 28 11:35:56 2012 -0400 @@ -0,0 +1,90 @@ +<tool id="gd_extract_primers" name="Pick Primers" version="1.0.0"> + <description>: Find suitable PCR primers for SNPs</description> + + <command interpreter="python"> + extract_primers.py "--input=$input" "--output=$output" "--primers_loc=${GALAXY_DATA_INDEX_DIR}/gd.primers.loc" + #if $override_metadata.choice == "0": + "--scaffold_col=${input.metadata.scaffold}" "--pos_col=${input.metadata.pos}" "--species=${input.metadata.species}" + #else + "--scaffold_col=$scaf_col" "--pos_col=$pos_col" "--species=$species" + #end if + </command> + + <inputs> + <param format="tabular" name="input" type="data" label="Selected SNPS dataset"/> + <conditional name="override_metadata"> + <param name="choice" type="select" format="integer" label="choose columns"> + <option value="0" selected="true">No, get columns from metadata</option> + <option value="1" >Yes, choose columns</option> + </param> + <when value="0" /> + <when value="1"> + <param name="scaf_col" type="data_column" data_ref="input" numerical="false" label="Column with scaffold"/> + <param name="pos_col" type="data_column" data_ref="input" numerical="true" label="Column with position"/> + <param name="species" type="select" label="Choose species"> + <options from_file="gd.species.txt"> + <column name="name" index="1"/> + <column name="value" index="0"/> + </options> + </param> + </when> + </conditional> + </inputs> + + <outputs> + <data format="txt" name="output"/> + </outputs> + + <tests> + <test> + <param name="input" value="test_out/select_snps/select_snps.gd_snp" ftype="gd_snp" /> + <param name="choice" value="0"/> + <output name="output" file="test_out/extract_primers/extract_primers.txt" /> + </test> + </tests> + + + <help> + +**What it does** + + This tool extracts primers for SNPs in the dataset using the Primer3 program. + The first line of output for a given SNP reports the name of the assembled + contig, the SNP's position in the contig, the two variant nucleotides, and + Primer3's "pair penalty". The next line, if not blank, names restriction + enzymes (from the user-adjustable list) that differentially cut at that + site, but do not cut at any other position between and including the + primer positions. The next lines show the SNP's flanking regions, with + the SNP position indicated by "n", including the primer positions and an + additional 3 nucleotides. + +----- + +**Example** + +- input file:: + + chr5_30800874_30802049 734 G A chr5 30801606 A 24 0 99 4 11 97 Y 496 0.502 0.033 0.215 6 + chr8_55117827_55119487 994 A G chr8 55118815 G 25 0 102 4 11 96 Y 22 0.502 0.025 2.365 1 + chr9_100484836_100485311 355 C T chr9 100485200 T 27 0 108 6 17 100 Y 190 0.512 0.880 2.733 4 + chr12_3635530_3637738 2101 T C chr12 3637630 T 25 0 102 4 13 93 Y 169 0.554 0.024 0.366 4 + +- output file:: + + chr5_30800874_30802049 734 G A 0.352964 + BglII,MboI,Sau3AI,Tru9I,XhoII + 1 CTGAAGGTGAGCAGGATTCAGGAGACAGAAAACAAAGCCCAGGCCTGCCCAAGGTGGAAA + >>>>>>>>>>>>>>>>>>>> + + 61 AGTCTAACAACTCGCCCTCTGCTTAnATCTGAGACTCACAGGGATAATAACACACTTGGT + + + 21 CAAGGAATAAACTAGATATTATTCACTCCTCTAGAAGGCTGCCAGGAAAATTGCCTGACT + <<<<<<< + + 181 TGAACCTTGGCTCTGA + <<<<<<<<<<<<< + etc. + + </help> +</tool>
--- a/filter_gd_snp.xml Fri Sep 28 11:34:31 2012 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,83 +0,0 @@ -<tool id="gd_filter_gd_snp" name="Filter SNPs" version="1.0.0"> - <description>: Discard some SNPs based on coverage or quality</description> - - <command interpreter="python"> - modify_snp_table.py "$input" "$p1_input" "$output" "$lo_coverage" "$hi_coverage" "$low_ind_cov" "$lo_quality" - #for $individual, $individual_col in zip($input.dataset.metadata.individual_names, $input.dataset.metadata.individual_columns) - #set $arg = '%s:%s' % ($individual_col, $individual) - "$arg" - #end for - </command> - - <inputs> - <param name="input" type="data" format="gd_snp" label="SNP dataset" /> - <param name="p1_input" type="data" format="gd_indivs" label="Population individuals" /> - <param name="lo_coverage" type="integer" min="0" value="0" label="Lower bound on total coverage" /> - <param name="hi_coverage" type="integer" min="0" value="1000" label="Upper bound on total coverage" /> - <param name="low_ind_cov" type="integer" min="0" value="0" label="Lower bound on individual coverage" /> - <param name="lo_quality" type="integer" min="0" value="0" label="Lower bound on individual quality values" /> - </inputs> - - <outputs> - <data name="output" format="gd_snp" metadata_source="input" /> - </outputs> - - <tests> - <test> - <param name="input" value="test_in/sample.gd_snp" ftype="gd_snp" /> - <param name="p1_input" value="test_in/a.gd_indivs" ftype="gd_indivs" /> - <param name="choice" value="1" /> - <param name="lo_coverage" value="0" /> - <param name="hi_coverage" value="1000" /> - <param name="low_ind_cov" value="3" /> - <param name="lo_quality" value="30" /> - <output name="output" file="test_out/modify_snp_table/modify.gd_snp" /> - </test> - </tests> - - <help> - -**Dataset formats** - -The input datasets are in gd_snp_ and gd_indivs_ formats. -The output dataset is in gd_snp_ format. (`Dataset missing?`_) - -.. _gd_snp: ./static/formatHelp.html#gd_snp -.. _gd_indivs: ./static/formatHelp.html#gd_indivs -.. _Dataset missing?: ./static/formatHelp.html - ------ - -**What it does** - -The user specifies that some of the individuals in a gd_snp dataset form a -"population", by supplying a list that has been previously created using the -Specify Individuals tool. SNPs are then discarded if their total coverage -for the population is too low or too high, or if their coverage or quality -score for any individual in the population is too low. - ------ - -**Example** - -- input gd_snp:: - - Contig161_chr1_4641264_4641879 115 C T 73.5 chr1 4641382 C 6 0 2 45 8 0 2 51 15 0 2 72 5 0 2 42 6 0 2 45 10 0 2 57 Y 54 0.323 0 - Contig48_chr1_10150253_10151311 11 A G 94.3 chr1 10150264 A 1 0 2 30 1 0 2 30 1 0 2 30 3 0 2 36 1 0 2 30 1 0 2 30 Y 22 +99. 0 - Contig20_chr1_21313469_21313570 66 C T 54.0 chr1 21313534 C 4 0 2 39 4 0 2 39 5 0 2 42 4 0 2 39 4 0 2 39 5 0 2 42 N 1 +99. 0 - etc. - -- input individuals:: - - 9 PB1 - 13 PB2 - 17 PB3 - -- output when the lower bound on individual coverage is "3":: - - Contig161_chr1_4641264_4641879 115 C T 73.5 chr1 4641382 C 6 0 2 45 8 0 2 51 15 0 2 72 5 0 2 42 6 0 2 45 10 0 2 57 Y 54 0.323 0 - Contig20_chr1_21313469_21313570 66 C T 54.0 chr1 21313534 C 4 0 2 39 4 0 2 39 5 0 2 42 4 0 2 39 4 0 2 39 5 0 2 42 N 1 +99. 0 - etc. - - </help> -</tool>
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/find_intervals.py Fri Sep 28 11:35:56 2012 -0400 @@ -0,0 +1,115 @@ +#!/usr/bin/env python + +import errno +import os +import subprocess +import sys + +################################################################################ + +def mkdir_p(path): + try: + os.makedirs(path) + except OSError, e: + if e.errno <> errno.EEXIST: + raise + +def run_program(prog, args, stdout_file=None): + #print "args:", ' '.join(args) + p = subprocess.Popen(args, bufsize=-1, executable=prog, stdin=None, stdout=subprocess.PIPE, stderr=subprocess.PIPE) + (stdoutdata, stderrdata) = p.communicate() + rc = p.returncode + + if stdout_file is not None: + with open(stdout_file, 'w') as ofh: + print >> ofh, stdoutdata.rstrip('\r\n') + + if rc != 0: + print >> sys.stderr, "FAILED: rc={0}: {1}".format(rc, ' '.join(args)) + print >> sys.stderr, stderrdata + sys.exit(1) + +################################################################################ + +if len(sys.argv) != 11: + print "usage" + sys.exit(1) + +input, dbkey, output, output_files_path, chrom_col, pos_col, score_col, shuffles, cutoff, report_snps = sys.argv[1:11] + +prog = 'sweep' + +args = [ prog ] +args.append(input) +args.append(chrom_col) +args.append(pos_col) +args.append(score_col) +args.append(cutoff) +args.append(shuffles) +args.append(report_snps) + +run_program(None, args, stdout_file=output) + +if report_snps == "0": + sys.exit(0) + +################################################################################ + +mkdir_p(output_files_path) + +bedgraph_filename = 'bedgraph.txt' +links_filename = os.path.join(output_files_path, 'links.txt') + +data = [] +links_data = [] + +with open(output) as fh: + chrom = None + for line in fh: + line = line.rstrip('\r\n') + if not line: + continue + if line[0] != ' ': + # chrom line, add a link + chrom, interval_begin, interval_end, interval_value = line.split('\t') + links_data.append((chrom, int(interval_begin), int(interval_end))) + else: + # data line, add a bedgraph line + begin, value = line.split() + data.append((chrom, int(begin), value)) + +with open(bedgraph_filename, 'w') as ofh: + print >> ofh, 'track type=bedGraph' + for chrom, begin, value in sorted(data): + print >> ofh, chrom, begin, begin+1, value + +with open(links_filename, 'w') as ofh: + for chrom, begin, end in sorted(links_data): + print >> ofh, chrom, begin, end + +################################################################################ + +chrom_sizes_filename = '{0}.chrom.sizes'.format(dbkey) + +prog = 'fetchChromSizes' + +args = [ prog ] +args.append(dbkey) + +run_program(None, args, stdout_file=chrom_sizes_filename) + +################################################################################ + +prog = 'bedGraphToBigWig' + +args = [ prog ] +args.append(bedgraph_filename) +args.append(chrom_sizes_filename) +args.append(output) + +run_program(None, args) + +################################################################################ + +sys.exit(0) +
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/find_intervals.xml Fri Sep 28 11:35:56 2012 -0400 @@ -0,0 +1,142 @@ +<tool id="gd_find_intervals" name="Remarkable Intervals" version="1.0.0"> + <description>: Find high-scoring runs of SNPs</description> + + <command interpreter="python"> + find_intervals.py "$input" "$input.metadata.dbkey" "$output" "$output.files_path" + + #if $override_metadata.choice == "0" + "$input.metadata.ref" "$input.metadata.rPos" + #else + "$override_metadata.ref_col" "$override_metadata.rpos_col" + #end if + + "$score_col" "$shuffles" + + #if $cutoff.type == 'percentage' + "$cutoff.cutoff_pct" + #else + "=$cutoff.cutoff_val" + #end if + + "$out_format" + </command> + + <inputs> + <param name="input" type="data" format="tabular" label="Input"> + <validator type="unspecified_build" message="This dataset does not have a reference species and cannot be used with this tool" /> + </param> + + <param name="score_col" type="data_column" data_ref="input" numerical="true" label="Column with score"/> + + <conditional name="cutoff"> + <param name="type" type="select" label="Cutoff type"> + <option value="percentage">percentage</option> + <option value="value">value</option> + </param> + <when value="percentage"> + <param name="cutoff_pct" type="float" value="95" min="0" max="100" label="Percentage cutoff"/> + </when> + <when value="value"> + <param name="cutoff_val" type="float" value="0.0" label="Value cutoff"/> + </when> + </conditional> + + <param name="shuffles" type="integer" min="0" value="0" label="Number of randomizations"/> + + <param name="out_format" type="select" format="integer" label="Report individual positions"> + <option value="0" selected="true">No</option> + <option value="1">Yes</option> + </param> + + <conditional name="override_metadata"> + <param name="choice" type="select" format="integer" label="Choose columns" help="Note: you need to choose the columns if the input dataset is not gd_snp"> + <option value="0" selected="true">No, get columns from metadata</option> + <option value="1" >Yes, choose columns</option> + </param> + <when value="0" /> + <when value="1"> + <param name="ref_col" type="data_column" data_ref="input" numerical="false" label="Column with reference chromosome" help="Note: be sure the build in the metadata is the same as using here."/> + <param name="rpos_col" type="data_column" data_ref="input" numerical="true" label="Column with reference position" help="Note: either zero or one based positions will work"/> + </when> + </conditional> + </inputs> + + <outputs> + <data name="output" format="interval"> + <change_format> + <when input="out_format" value="1" format="bigwigpos" /> + </change_format> + </data> + </outputs> + + <tests> + <test> + <param name="input" value="test_in/sample.gd_snp" ftype="gd_snp" /> + <param name="score_col" value="5" /> + <param name="type" value="value" /> + <param name="cutoff_val" value="700.0" /> + <param name="shuffles" value="10" /> + <param name="out_format" value="0" /> + <param name="choice" value="0" /> + + <output name="output" file="test_out/find_intervals/find_intervals.interval" /> + </test> + </tests> + + <help> + +**Dataset formats** + +The input dataset is tabular_, with required columns of chromosome, position, +and score (in any column). +The output dataset is interval_. (`Dataset missing?`_) + +.. _interval: ./static/formatHelp.html#interval +.. _tabular: ./static/formatHelp.html#tab +.. _Dataset missing?: ./static/formatHelp.html + +----- + +**What it does** + +The user selects a tabular dataset (such as a gd_snp dataset) and +if the dataset is not also gd_snp format, specifies +the columns containing chromosome, position, and scores (such as an Fst-value for the SNP). +For gd_snp format the metadata can be used to specify the chromosome and +position. +Other inputs include +a percentage or raw score for the "cutoff" which should be greater than the +average value for the scores column. A higher value will give smaller intervals +in the output. +If a percentage (e.g. 95%) is specified +then that percentile of the scores is used as the cutoff; +percentile may not work well if many rows or SNPs have the same score +(in that case use a raw score). The program subtracts the +cutoff from every score, then finds genomic intervals (i.e., consecutive runs +of SNPs) whose total score cannot be increased by adding or subtracting one +or more adjusted scores at the ends of the interval. +Another input is the number of times the +data should be randomized (only intervals with score exceeding the maximum for +the randomized data are reported). +If 100 shuffles are requested, then any interval reported by the tool has a +score with probability less than 0.01 of being equaled or exceeded by chance. + +----- + +**Example** + +- input (gd_snp):: + + Contig222_chr2_9817738_9818143 220 C T 888.0 chr2 9817960 C 17 0 2 78 12 0 2 63 20 0 2 87 8 0 2 51 11 0 2 60 12 0 2 63 Y 76 0.093 1 + Contig47_chr2_25470778_25471576 126 G A 888.0 chr2 25470896 G 12 0 2 63 14 0 2 69 14 0 2 69 10 0 2 57 18 0 2 81 13 0 2 66 N 11 0.289 1 + ... + Contig115_chr2_61631913_61632510 310 G T 999.3 chr2 61632216 G 7 0 2 48 9 0 2 54 7 0 2 48 11 0 2 60 10 0 2 57 10 0 2 57 N 13 0.184 0 + Contig31_chr2_67331584_67331785 39 C T 999.0 chr2 67331623 C 11 0 2 60 10 0 2 57 7 0 2 48 9 0 2 54 2 0 2 33 4 0 2 39 N 110 0.647 1 + etc. + +- output not reporting individual positions:: + + chr2 9817960 67331624 1272.2000 + + </help> +</tool>
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/gd_composite.py Fri Sep 28 11:35:56 2012 -0400 @@ -0,0 +1,128 @@ +#!/usr/bin/env python + +from galaxy import eggs +import pkg_resources +pkg_resources.require( "Cheetah" ) +from Cheetah.Template import Template + +import errno +import os +from datetime import datetime + +################################################################################ + +def die(message): + print >> sys.stderr, message + sys.exit(1) + +def mkdir_p(path): + try: + os.makedirs(path) + except OSError, e: + if e.errno <> errno.EEXIST: + raise + +################################################################################ + +class Display(object): + def display(self, parameter): + print parameter + +class DisplayFile(Display): + def display(self, parameter): + return '<a href="{0}">{1}</a>'.format(parameter.value, parameter.name) + +class DisplayValue(Display): + def display(self, parameter): + if parameter.value is not None: + return '{0}: {1}'.format(parameter.description, parameter.value) + else: + return '{0}'.format(parameter.description) + +class DisplayTagList(Display): + def display(self, parameter): + rv = [] + if parameter.name: + rv.append(parameter.name) + rv.append('<ol>') + for tag in parameter.value: + col, individual_name = tag.split(':') + rv.append('<li>{0}</li>'.format(individual_name)) + rv.append('</ol>') + return '\n'.join(rv) + +class DisplayPopulationList(Display): + def display(self, parameter): + rv = [] + rv.append('Populations') + rv.append('<ul>') + for population in parameter.value: + rv.append('<li>') + if population.name is not None: + rv.append(population.name) + rv.append('<ol>') + for name in population.individual_names(): + rv.append('<li>{0}</li>'.format(name)) + rv.append('</ol>') + rv.append('</li>') + rv.append('</ul>') + return '\n'.join(rv) + +# def display(self, parameter, name=''): +# print '<ul> {0}'.format(name) +# for individual_name in parameter.individual_names(): +# print '<li>{0}>/li>'.format(individual_name) +# print '</ul>' + + +class Parameter(object): + def __init__(self, name=None, value=None, description=None, display_type=None): + self.name = name + self.value = value + self.description = description + if display_type is None: + self.display_type = Display() + else: + self.display_type = display_type + + def display(self): + return self.display_type.display(self) + +class InfoPage(object): + _realpath = os.path.realpath(__file__) + _script_dir = os.path.dirname(_realpath) + template_file = os.path.join(_script_dir, 'gd_composite_template.html') + def __init__(self): + self.timestamp = datetime.now().strftime('%Y-%m-%d %I:%M:%S %p') + self.title = 'Genome Diversity Composite Dataset' + self.inputs = [] + self.outputs = [] + self.misc = '' + self.template = self.load_template() + + def load_template(self): + with open(self.template_file) as f: + return f.read().rstrip('\r\n') + + def set_title(self, title): + self.title = title + + def add_input_parameter(self, parameter): + self.inputs.append(parameter) + + def add_output_parameter(self, parameter): + self.outputs.append(parameter) + + def add_misc(self, misc): + self.misc = misc + + def render(self): + return Template(self.template, searchList=[{'tool': self}]) + + + + + + + +
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/gd_composite_template.html Fri Sep 28 11:35:56 2012 -0400 @@ -0,0 +1,40 @@ +<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN" +"http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd"> +<html xmlns="http://www.w3.org/1999/xhtml"> + <head> + <meta http-equiv="Content-type" content="text/html; charset=UTF-8" /> + <link rel="stylesheet" href="/static/style/base.css" type="text/css" /> + <title>${tool.title}</title> + </head> + <body> + <div class="document"> + Output completed: $tool.timestamp + <p/> + #if $tool.outputs + <div id="gd_outputs"> + Outputs + <ul> + #for output in $tool.outputs + <li>${output.display()}</li> + #end for + </ul> + </div> + #end if + #if $tool.inputs + <div id="gd_inputs"> + Inputs + <ul> + #for input in $tool.inputs + <li>${input.display()}</li> + #end for + </ul> + </div> + #end if + #if $tool.misc + <div id="gd_misc"> + $tool.misc.display() + </div> + #end if + </div> + </body> +</html>
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/genome_diversity.py Fri Sep 28 11:35:56 2012 -0400 @@ -0,0 +1,266 @@ +#!/usr/bin/env python + +import sys +import cdblib + +def _openfile( filename=None, mode='r' ): + try: + fh = open( filename, mode ) + except IOError, err: + raise RuntimeError( "can't open file: %s\n" % str( err ) ) + return fh + +def get_filename_from_loc( species=None, filename=None ): + fh = _openfile( filename ) + for line in fh: + if line and not line.startswith( '#' ): + line = line.rstrip( '\r\n' ) + if line: + elems = line.split( '\t' ) + if len( elems ) >= 2 and elems[0] == species: + return elems[1] + + raise RuntimeError( "can't find '%s' in location file: %s\n" % ( species, filename ) ) + + +class SnpFile( object ): + def __init__( self, filename=None, seq_col=1, pos_col=2, ref_seq_col=7, ref_pos_col=8 ): + self.filename = filename + self.fh = _openfile( filename ) + self.seq_col = seq_col + self.pos_col = pos_col + self.ref_seq_col = ref_seq_col + self.ref_pos_col = ref_pos_col + self.elems = None + self.line = None + self.comments = [] + + def next( self ): + while self.fh: + try: + self.line = self.fh.next() + except StopIteration: + self.line = None + self.elems = None + return None + if self.line: + self.line = self.line.rstrip( '\r\n' ) + if self.line: + if self.line.startswith( '#' ): + self.comments.append( self.line ) + else: + self.elems = self.line.split( '\t' ) + return 1 + + def get_seq_pos( self ): + if self.elems: + return self.elems[ self.seq_col - 1 ], self.elems[ self.pos_col - 1 ] + else: + return None, None + + def get_ref_seq_pos( self ): + if self.elems: + return self.elems[ self.ref_seq_seq - 1 ], self.elems[ self.ref_pos_col - 1 ] + else: + return None, None + + +class IndexedFile( object ): + + def __init__( self, data_file=None, index_file=None ): + self.data_file = data_file + self.index_file = index_file + self.data_fh = _openfile( data_file ) + self.index_fh = _openfile( index_file ) + self._reader = cdblib.Reader( self.index_fh.read(), hash ) + + def get_indexed_line( self, key=None ): + line = None + if key in self._reader: + offset = self._reader.getint( key ) + self.data_fh.seek( offset ) + try: + line = self.data_fh.next() + except StopIteration: + raise RuntimeError( 'index file out of sync for %s' % key ) + return line + +class PrimersFile( IndexedFile ): + def get_primer_header( self, sequence=None, position=None ): + key = "%s %s" % ( str( sequence ), str( position ) ) + header = self.get_indexed_line( key ) + if header: + if header.startswith( '>' ): + elems = header.split() + if len( elems ) < 3: + raise RuntimeError( 'short primers header for %s' % key ) + if sequence != elems[1] or str( position ) != elems[2]: + raise RuntimeError( 'primers index for %s finds %s %s' % ( key, elems[1], elems[2] ) ) + else: + raise RuntimeError( 'primers index out of sync for %s' % key ) + return header + + def get_entry( self, sequence=None, position=None ): + entry = self.get_primer_header( sequence, position ) + if entry: + while self.data_fh: + try: + line = self.data_fh.next() + except StopIteration: + break + if line.startswith( '>' ): + break + entry += line + return entry + + def get_enzymes( self, sequence=None, position=None ): + entry = self.get_primer_header( sequence, position ) + enzyme_list = [] + if entry: + try: + line = self.data_fh.next() + except StopIteration: + raise RuntimeError( 'primers entry for %s %s is truncated' % ( str( sequence ), str( position ) ) ) + if line.startswith( '>' ): + raise RuntimeError( 'primers entry for %s %s is truncated' % ( str( sequence ), str( position ) ) ) + line.rstrip( '\r\n' ) + if line: + enzymes = line.split( ',' ) + for enzyme in enzymes: + enzyme = enzyme.strip() + if enzyme: + enzyme_list.append( enzyme ) + return enzyme_list + +class SnpcallsFile( IndexedFile ): + def get_snp_seq( self, sequence=None, position=None ): + key = "%s %s" % ( str( sequence ), str( position ) ) + line = self.get_indexed_line( key ) + if line: + elems = line.split( '\t' ) + if len (elems) < 3: + raise RuntimeError( 'short snpcalls line for %s' % key ) + if sequence != elems[0] or str( position ) != elems[1]: + raise RuntimeError( 'snpcalls index for %s finds %s %s' % ( key, elems[0], elems[1] ) ) + return elems[2] + else: + return None + + def get_flanking_dna( self, sequence=None, position=None, format='fasta' ): + if format != 'fasta' and format != 'primer3': + raise RuntimeError( 'invalid format for flanking dna: %s' % str( format ) ) + seq = self.get_snp_seq( sequence, position ) + if seq: + p = seq.find('[') + if p == -1: + raise RuntimeError( 'snpcalls entry for %s %s missing left bracket: %s' % ( str( sequence ), str( position ), seq ) ) + q = seq.find(']', p + 1) + if q == -1: + raise RuntimeError( 'snpcalls entry for %s %s missing right bracket: %s' % ( str( sequence ), str( position ), seq ) ) + q += 1 + + if format == 'fasta': + flanking_seq = '> ' + else: + flanking_seq = 'SEQUENCE_ID=' + + flanking_seq += "%s %s %s %s\n" % ( str( sequence ), str( position ), seq[p+1], seq[p+3] ) + + if format == 'primer3': + flanking_seq += 'SEQUENCE_TEMPLATE=' + + flanking_seq += "%sn%s\n" % ( seq[0:p], seq[q:] ) + + if format == 'primer3': + flanking_seq += "SEQUENCE_TARGET=%d,11\n=\n" % ( p - 5 ) + + return flanking_seq + else: + return None + + + +class LocationFile( object ): + def __init__(self, filename): + self.build_map(filename) + + def build_map(self, filename): + self.map = {} + self.open_file(filename) + for line in self.read_lines(): + elems = line.split('\t', 1) + if len(elems) == 2: + self.map[ elems[0].strip() ] = elems[1].strip() + self.close_file() + + def read_lines(self): + for line in self.fh: + if not line.startswith('#'): + line = line.rstrip('\r\n') + yield line + + def open_file(self, filename): + self.filename = filename + try: + self.fh = open(filename, 'r') + except IOError, err: + print >> sys.stderr, "Error opening location file '%s': %s" % (filename, str(err)) + sys.exit(1) + + def close_file(self): + self.fh.close() + + def loc_file( self, key ): + if key in self.map: + return self.map[key] + else: + print >> sys.stderr, "'%s' does not appear in location file '%s'" % (key, self.filename) + sys.exit(1) + +class ChrLens( object ): + def __init__( self, chrlen_filename ): + self.chrlen_filename = chrlen_filename + self.build_map() + + def build_map(self): + self.map = {} + self.open_file(self.chrlen_filename) + for line in self.read_lines(): + elems = line.split('\t', 1) + if len(elems) == 2: + chrom = elems[0].strip() + chrom_len_text = elems[1].strip() + try: + chrom_len = int( chrom_len_text ) + except ValueError: + print >> sys.stderr, "Bad length '%s' for chromosome '%s' in '%s'" % (chrom_len_text, chrom, self.chrlen_filename) + self.map[ chrom ] = chrom_len + self.close_file() + + def read_lines(self): + for line in self.fh: + if not line.startswith('#'): + line = line.rstrip('\r\n') + yield line + + def open_file(self, filename): + self.filename = filename + try: + self.fh = open(filename, 'r') + except IOError, err: + print >> sys.stderr, "Error opening chromosome length file '%s': %s" % (filename, str(err)) + sys.exit(1) + + def close_file(self): + self.fh.close() + + def length( self, key ): + if key in self.map: + return self.map[key] + else: + return None + + def __iter__( self ): + for chrom in self.map: + yield chrom +
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/genome_diversity/Makefile Fri Sep 28 11:35:56 2012 -0400 @@ -0,0 +1,8 @@ +all: + cd src && make + +clean: + cd src && make clean + +install: + cd src && make install
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/genome_diversity/bin/gd_ploteig Fri Sep 28 11:35:56 2012 -0400 @@ -0,0 +1,172 @@ +#!/usr/bin/env perl + +### ploteig -i eigfile -p pops -c a:b [-t title] [-s stem] [-o outfile] [-x] [-k] [-y] [-z sep] +use Getopt::Std ; +use File::Basename ; +use warnings ; + +## pops : separated -x = make postscript and pdf -z use another separator +## -k keep intermediate files +## NEW if pops is a file names are read one per line + +getopts('i:o:p:c:s:d:z:t:xky',\%opts) ; +$postscmode = $opts{"x"} ; +$oldkeystyle = $opts{"y"} ; +$kflag = $opts{"k"} ; +$keepflag = 1 if ($kflag) ; +$keepflag = 1 unless ($postscmode) ; + +$zsep = ":" ; +if (defined $opts{"z"}) { + $zsep = $opts{"z"} ; + $zsep = "\+" if ($zsep eq "+") ; +} + +$title = "" ; +if (defined $opts{"t"}) { + $title = $opts{"t"} ; +} +if (defined $opts{"i"}) { + $infile = $opts{"i"} ; +} +else { + usage() ; + exit 0 ; +} +open (FF, $infile) || die "can't open $infile\n" ; +@L = (<FF>) ; +chomp @L ; +$nf = 0 ; +foreach $line (@L) { + next if ($line =~ /^\s+#/) ; + @Z = split " ", $line ; + $x = @Z ; + $nf = $x if ($nf < $x) ; +} +printf "## number of fields: %d\n", $nf ; +$popcol = $nf-1 ; + + +if (defined $opts{"p"}) { + $pops = $opts{"p"} ; +} +else { + die "p parameter compulsory\n" ; +} + +$popsname = setpops ($pops) ; +print "$popsname\n" ; + +$c1 = 1; $c2 =2 ; +if (defined $opts{"c"}) { + $cols = $opts{"c"} ; + ($c1, $c2) = split ":", $cols ; + die "bad c param: $cols\n" unless (defined $cols) ; +} + +$stem = "$infile.$c1:$c2" ; +if (defined $opts{"s"}) { + $stem = $opts{"s"} ; +} +$gnfile = "$stem.$popsname.xtxt" ; + +if (defined $opts{"o"}) { + $gnfile = $opts{"o"} ; +} + +@T = () ; ## trash +open (GG, ">$gnfile") || die "can't open $gnfile\n" ; +print GG "## " unless ($postscmode) ; +print GG "set terminal postscript color\n" ; +print GG "set style line 2 lc rgbcolor \"#376600\"\n"; +print GG "set style line 11 lc rgbcolor \"#376600\"\n"; +print GG "set style line 20 lc rgbcolor \"#376600\"\n"; +print GG "set style line 29 lc rgbcolor \"#376600\"\n"; +print GG "set style line 6 lc rgbcolor \"#FFCC00\"\n"; +print GG "set style line 15 lc rgbcolor \"#FFCC00\"\n"; +print GG "set style line 24 lc rgbcolor \"#FFCC00\"\n"; +print GG "set style increment user\n"; +print GG "set title \"$title\" \n" ; +print GG "set key outside\n" unless ($oldkeystyle) ; +print GG "set xlabel \"eigenvector $c1\" \n" ; +print GG "set ylabel \"eigenvector $c2\" \n" ; +print GG "plot " ; +$np = @P ; +$lastpop = $P[$np-1] ; +$d1 = $c1+1 ; +$d2 = $c2+1 ; +foreach $pop (@P) { + $dfile = "$stem:$pop" ; + push @T, $dfile ; + print GG " \"$dfile\" using $d1:$d2 title \"$pop\" " ; + print GG ", \\\n" unless ($pop eq $lastpop) ; + open (YY, ">$dfile") || die "can't open $dfile\n" ; + foreach $line (@L) { + next if ($line =~ /^\s+#/) ; + @Z = split " ", $line ; + next unless (defined $Z[$popcol]) ; + next unless ($Z[$popcol] eq $pop) ; + print YY "$line\n" ; + } + close YY ; +} +print GG "\n" ; +print GG "## " if ($postscmode) ; +print GG "pause 9999\n" ; +close GG ; + +if ($postscmode) { +$psfile = "$stem.ps" ; + + if ($gnfile =~ /xtxt/) { + $psfile = $gnfile ; + $psfile =~ s/xtxt/ps/ ; + } +system "gnuplot < $gnfile > $psfile" ; +#system "fixgreen $psfile" ; +system "ps2pdf $psfile " ; +} +unlink (@T) unless $keepflag ; + +sub usage { + +print "ploteig -i eigfile -p pops -c a:b [-t title] [-s stem] [-o outfile] [-x] [-k]\n" ; +print "-i eigfile input file first col indiv-id last col population\n" ; +print "## as output by smartpca in outputvecs \n" ; +print "-c a:b a, b columns to plot. 1:2 would be common and leading 2 eigenvectors\n" ; +print "-p pops Populations to plot. : delimited. eg -p Bantu:San:French\n" ; +print "## pops can also be a filename. List populations 1 per line\n" ; +print "[-s stem] stem will start various output files\n" ; +print "[-o ofile] ofile will be gnuplot control file. Should have xtxt suffix\n"; +print "[-x] make ps and pdf files\n" ; +print "[-k] keep various intermediate files although -x set\n" ; +print "## necessary if .xtxt file is to be hand edited\n" ; +print "[-y] put key at top right inside box (old mode)\n" ; +print "[-t] title (legend)\n" ; + +print "The xtxt file is a gnuplot file and can be easily hand edited. Intermediate files +needed if you want to make your own plot\n" ; + +} +sub setpops { + my ($pops) = @_ ; + local (@a, $d, $b, $e) ; + + if (-e $pops) { + open (FF1, $pops) || die "can't open $pops\n" ; + @P = () ; + foreach $line (<FF1>) { + ($a) = split " ", $line ; + next unless (defined $a) ; + next if ($a =~ /\#/) ; + push @P, $a ; + } + $out = join ":", @P ; + print "## pops: $out\n" ; + ($b, $d , $e) = fileparse($pops) ; + return $b ; + } + @P = split $zsep, $pops ; + return $pops ; + +}
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/genome_diversity/src/Fst_ave.c Fri Sep 28 11:35:56 2012 -0400 @@ -0,0 +1,228 @@ +/* Fst_ave -- determine the average Fst values between two specified populations +* and between two random populations +* +* argv{1] = a Galaxy SNP table. For each of several individuals, the table +* has four columns (#A, #B, genotype, quality). +* argv[2] = 1 if Fst is estimated from SAMtools genotypes; 0 means use +* read-coverage data. +* argv[3] = lower bound, for individual quality value if argv[2] = 1 +* or for total number of reads per population if argv[2] = 0. +* SNPs not satisfying these lower bounds are ignored. +* argv[4] = 1 to discard SNPs that appear fixed in the two populations +* argv[5] = 1 for unbiased estimator, else 0 for the original Wright form. +* argv[6] = k => 0 says report the average Fst and the largest average over k +* randomly chosen splits into two populations of those sizes +* argv[7], argv[8], ..., have the form "13:1", "13:2" or "13:0", meaning +* that the 13th and 14th columns (base 1) give the allele counts +* for an individual that is in population 1, in population 2, +* or in neither population. + +What it does on Galaxy + +The user specifies a SNP table and two "populations" of individuals, both previously defined using the Galaxy tool to select individuals from a SNP table. No individual can be in both populations. Other choices are as follows. + +Data soure. The allele frequencies of a SNP in the two populations can be estimated either by the total number of reads of each allele, or by adding the frequencies inferred from genotypes of individuals in the populations. + +After specifying the data source, the user sets lower bounds on amount of data required at a SNP. For estimating the Fst using read counts, the bound is the minimum count of reads of the two alleles in a population. For estimations based on genotype, the bound is the minimum reported genotype quality per individual. SMPs not meeting these lower bounds are ignored. + +The user specifies whether SNPs where both populations appear to be fixed for the same allele should be retained or discarded. + +The user chooses which definition of Fst to use: Wright's original definition or Weir's unbiased estimator. + +Finally, the user decides whether to use randomizations. If so, then the user specifies how many randomly generated population pairs (retaining the numbers of individuals of the originals) to generate, as well as the "population" of additional individuals (not in the first two popuations) that can be used in the ransmization process. + +The program prints the average Fst for the original populations and the number of SNPs used to compute it. If randomizations were requested, it prints the average Fst for each randomly generated population pair, ending with a summary that includes the maximum and average value, and the highest-scoring population pair. +*/ + +#include "lib.h" +#include "Fst_lib.h" + +// maximum legth of a line from the table +#define MOST 5000 + +// information about the specified individuals +// x is an array of nI values 0, 1, or 2; +// shuffling x creates random "populations" +int col[MOST], x[MOST], best_x[MOST]; +int nI, lower_bound, unbiased, discard, genotypes, nsnp; + +// each SNP has an array of counts +struct count { + int A, B; +}; + +// linked list summarizes the Galaxy table +struct snp { + struct count *c; + struct snp *next; +} *start, *last; + +// given the two populations specified by x[], return the average Fst +double ave_Fst() { + double tot_Fst; + struct snp *s; + int i, A1, B1, A2, B2, too_few; + + + // scan the SNPs + tot_Fst = 0.0; + nsnp = 0; + for (s = start; s != NULL; s = s->next) { + // get counts for the two populations at this SNP + for (A1 = B1 = A2 = B2 = i = 0; i < nI; ++i) { + if (s->c[i].A < 0) // no genotypes + continue; + if (x[i] == 1) { + A1 += s->c[i].A; + B1 += s->c[i].B; + } else if (x[i] == 2) { + A2 += s->c[i].A; + B2 += s->c[i].B; + } + } + if (discard && ((A1 == 0 && A2 == 0) || (B1 == 0 && B2 == 0))) + continue; // fixed in these two populations + too_few = (genotypes ? 1 : lower_bound); + if (A1+B1 >= too_few && A2+B2 >= too_few) { + ++nsnp; + tot_Fst += Fst(A1, B1, A2, B2, unbiased); + } + } + return tot_Fst/nsnp; +} + +/* shuffle the values x[0], x[1], ... , x[nI-1]; +* Uses Algorithm P in page 125 of "The Art of Computer Programming (Vol II) +* Seminumerical Programming", by Donald Knuth, Addison-Wesley, 1971. +*/ +void shuffle() { + int i, j, temp; + + for (i = nI - 1; i > 0; --i) { + // swap what's in location i with location j, where 0 <= j <= i + j = random() % (i+1); + temp = x[i]; + x[i] = x[j]; + x[j] = temp; + } +} + +int main(int argc, char **argv) { + FILE *fp; + char *p, *z = "\t\n", buf[MOST]; + int X[MOST], nshuff, n, i, j, k, saw[3], larger, all = 1; + struct snp *new; + double F, F1, largest_F, tot_F; + + if (argc < 7) + fatal("args: table data-source lower_bound discard? unbiased? #shuffles n:1 m:2 ..."); + + // handle command-line arguments + genotypes = atoi(argv[2]); + lower_bound = atoi(argv[3]); + if (!genotypes && lower_bound <= 0) + fatal("minimum coverage should exceed 0"); + discard = atoi(argv[4]); + unbiased = atoi(argv[5]); + nshuff = atoi(argv[6]); + saw[0] = saw[1] = saw[2] = 0; + // populations 1 and 2 must be disjoint + // population 0 can be replaced by population 1 or 2 + for (i = 7; i < argc; ++i) { + if (sscanf(argv[i], "%d:%d", &j, &k) != 2) + fatalf("not like 13:2 : %s", argv[i]); + if (k < 0 || k > 2) + fatalf("not population 0, 1 or 2: %s", argv[i]); + saw[k] = 1; + // seen this individual (i.e., column) before?? + for (n = 0; n < nI && col[n] != j; ++n) + ; + if (n < nI) { // OK if one of the populations is 0 + if (k > 0) { + if (x[n] > 0 && x[n] != k) + fatalf("column %d is in both populations", j); + x[n] = k; + } + } else { + col[nI] = j; + x[nI] = k; + ++nI; + } + } + if (saw[1] == 0) + fatal("population 1 is empty"); + if (saw[2] == 0) + fatal("population 2 is empty"); + + // read the table of SNPs and store the essential allele counts + fp = ckopen(argv[1], "r"); + while (fgets(buf, MOST, fp)) { + if (buf[0] == '#') + continue; + new = ckalloc(sizeof(*new)); + new->next = NULL; + new->c = ckalloc(nI*sizeof(struct count)); + // set X[i] = atoi(i-th word of buf), i is base 1 + for (i = 1, p = strtok(buf, z); p != NULL; + ++i, p = strtok(NULL, z)) + X[i] = atoi(p); + for (i = 0; i < nI; ++i) { + n = col[i]; + if (genotypes) { + k = X[n+2]; + if (k == -1 || X[n+3] < lower_bound) + new->c[i].A = new->c[i].B = -1; + else { + new->c[i].A = k; + new->c[i].B = 2 - k; + } + } else { + new->c[i].A = X[n]; + new->c[i].B = X[n+1]; + } + } + if (start == NULL) + start = new; + else + last->next = new; + last = new; + } + fclose(fp); + + F1 = ave_Fst(); + printf("average Fst is %5.5f, using %d SNPs\n", F1, nsnp); + for (j = 0; j < nI; ++j) + best_x[j] = x[j]; + for (tot_F = largest_F = 0.0, larger = i = 0; i < nshuff; ++i) { + shuffle(); + if ((F = ave_Fst()) > F1) + ++larger; + if (F > largest_F) { + largest_F = F; + for (j = 0; j < nI; ++j) + best_x[j] = x[j]; + } + tot_F += F; + if (all) // make this optional? + printf("%d: %f\n", i+1, F); + } + if (nshuff > 0) { + printf("%d of %d random groupings had a larger average Fst\n", + larger, nshuff); + printf("largest = %5.5f, mean = %5.5f\n", largest_F, + tot_F/nshuff); + if (largest_F > F1) { + printf("first columns for the best two populations:\n"); + for (i = 0; i < nI; ++i) + if (best_x[i] == 1) + printf("%d ", col[i]); + printf("and\n"); + for (i = 0; i < nI; ++i) + if (best_x[i] == 2) + printf("%d ", col[i]); + putchar('\n'); + } + } + + return 0; +}
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/genome_diversity/src/Fst_column.c Fri Sep 28 11:35:56 2012 -0400 @@ -0,0 +1,132 @@ +/* Fst_column -- add an Fst column to a Galaxy table +* +* argv{1] = a Galaxy SNP table. For each of several individuals, the table +* has four columns (#A, #B, genotype, quality). +* argv[2] = 1 if Fst is estimated from SAMtools genotypes; 0 means use +* read-coverage data. +* argv[3] = lower bound for total number of reads per population +* argv[4] = lower bound for individual quality value +* argv[5] = 1 to retain SNPs that fail to satisfy the lower bound and set +* Fst = -1; delete them if argv[4] = 0. +* argv[6] = 1 to discard SNPs that appear fixed in the two populations +* argv[7] = 1 for unbiased estimator, else 0 for the original Wright form. +* argv[8], argv[9], ..., have the form "13:1" or "13:2", meaning that +* the 13th, 14th, and 15th columns (base 1) give the allele counts +* and genotype for an individual that is in population 1 or +* population 2, respectively. + +What It Does on Galaxy + +The user specifies a SNP table and two "populations" of individuals, both previously defined using the Galaxy tool to select individuals from a SNP table. No individual can be in both populations. Other choices are as follows. + +Data soure. The allele frequencies of a SNP in the two populations can be estimated either by the total number of reads of each allele, or by adding the frequencies inferred from genotypes of individuals in the populations. + +After specifying the data source, the user sets lower bounds on amount of data required at a SNP. For estimating the Fst using read counts, the bound is the minimum count of reads of the two alleles in a population. For estimations based on genotype, the bound is the minimum reported genotype quality per individual. + +The user specifies whether the SNPs that violate the lower bound should be ignored or the Fst set to -1. + +The user specifies whether SNPs where both populations appear to be fixed for the same allele should be retained or discarded. + +Finally, the user chooses which definition of Fst to use: Wright's original definition or Weir's unbiased estimator. + +A column is appended to the SNP table giving the Fst for each retained SNP. + +*/ + +#include "lib.h" +#include "Fst_lib.h" + +// most characters allowed in a row of the table +#define MOST 5000 + +// column and population for the relevant individuals/groups +int col[MOST], pop[MOST]; +int nI; + +int main(int argc, char **argv) { + FILE *fp; + char *p, *z = "\t\n", buf[MOST], trash[MOST]; + int X[MOST], min_cov, min_qual, retain, discard, unbiased, genotypes, + n, i, g, A1, B1, A2, B2, saw[3], x1, y1, x2, y2; + double F; + + if (argc < 7) + fatal("args: table data-source lower-bound retain? discard? unbiased? n:1 m:2 ..."); + genotypes = atoi(argv[2]); + min_cov = atoi(argv[3]); + min_qual = atoi(argv[4]); + retain = atoi(argv[5]); + discard = atoi(argv[6]); + unbiased = atoi(argv[7]); + saw[1] = saw[2] = 0; + for (i = 8; i < argc; ++i, ++nI) { + if (sscanf(argv[i], "%d:%d", &(col[nI]), &(pop[nI])) != 2) + fatalf("not like 13:2 : %s", argv[i]); + if (pop[nI] < 1 || pop[nI] > 2) + fatalf("not population 1 or 2: %s", argv[i]); + saw[pop[nI]] = 1; + // seen this individual before? + for (n = 0; n < nI && col[n] != col[nI]; ++n) + ; + if (n < nI) + fatalf("individual at column %d is mentioned twice", + col[n]); + } + if (saw[1] == 0) + fatal("population 1 is empty"); + if (saw[2] == 0) + fatal("population 2 is empty"); + + fp = ckopen(argv[1], "r"); + while (fgets(buf, MOST, fp)) { + if (buf[0] == '#') + continue; + strcpy(trash, buf); + // set X[i] = atoi(i-th word of s), i is base 0 + for (i = 1, p = strtok(trash, z); p != NULL; + ++i, p = strtok(NULL, z)) + X[i] = atoi(p); + for (i = A1 = B1 = A2 = B2 = x1 = y1 = x2 = y2 = 0; + i < nI; ++i) { + n = col[i]; + g = X[n+2]; // save genotype + if ((genotypes && g == -1) || X[n+3] < min_qual) + continue; + if (pop[i] == 1) { + // column n (base 1) corresponds to entry X[n] + x1 += X[n]; + y1 += X[n+1]; + if (genotypes) { + A1 += g; + B1 += (2 - g); + } else { + A1 += X[n]; + B1 += X[n+1]; + } + } else if (pop[i] == 2) { + x2 += X[n]; + y2 += X[n+1]; + if (genotypes) { + A2 += g; + B2 += (2 - g); + } else { + A2 += X[n]; + B2 += X[n+1]; + } + } + } + if (discard && ((A1 == 0 && A2 == 0) || (B1 == 0 && B2 == 0))) + continue; // not variable in the two populations + if (x1+y1 < min_cov || x2+y2 < min_cov) + F = -1.0; + else + F = Fst(A1, B1, A2, B2, unbiased); + if (F == -1.0 && !retain) + continue; + if ((p = strchr(buf, '\n')) != NULL) + *p = '\0'; + printf("%s\t%5.4f\n", buf, F); + } + + return 0; +}
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/genome_diversity/src/Fst_lib.c Fri Sep 28 11:35:56 2012 -0400 @@ -0,0 +1,49 @@ +// procedure to compute either Wright's Fst or an unbiased estimator of if + +#include "lib.h" +// Wright's Fst +static double Wright(double f1, double f2) { + double + f, // frequency in the pooled population + H_ave, // average of HWE heterogosity in the two populations + H_all; // HWE heterozygosity in the pooled popuations + + H_ave = f1*(1.0 - f1) + f2*(1.0 - f2); + f = (f1 + f2)/2.0; + if (f == 0.0 || f == 1.0) + return 0.0; + H_all = 2.0*f*(1.0 - f); + return (H_all - H_ave) / H_all; +} + +/* unbiased estimator of Fst from: + Weir, B.S. and Cockerham, C.C. 1984. Estimating F-statistics for the + analysis of population structure. Evolution 38: 1358–1370. +as interpreted by: + Akey, J.M., Zhang, G., Zhang, K., Jin, L., and Shriver, M.D. 2002. + Interrogating a high-density SNP map for signatures of natural + selection. Genome Res. 12: 1805–1814. +*/ +static double Weir(int n1, double p1, int n2, double p2) { + double F, p_bar, nc, MSP, MSG, N = n1 + n2; + + if (p1 == p2) + return 0.0; + MSG = (n1*p1*(1.0-p1) + n2*p2*(1.0-p2))/(N-1.0); + p_bar = (n1*p1 + n2*p2)/N; + MSP = n1*(p1-p_bar)*(p1-p_bar) + n2*(p2-p_bar)*(p2-p_bar); + nc = N - (double)(n1*n1 + n2*n2)/N; + F = (MSP - MSG) / (MSP + (nc-1)*MSG); + if (F < 0.0) + F = 0.0; + return F; +} + +double Fst(int nA1, int na1, int nA2, int na2, int unbiased) { + double p1, p2; + + p1 = (double)nA1 / (double)(nA1+na1); + p2 = (double)nA2 / (double)(nA2+na2); + + return (unbiased ? Weir(nA1+na1, p1, nA2+na2, p2) : Wright(p1, p2)); +}
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/genome_diversity/src/Fst_lib.h Fri Sep 28 11:35:56 2012 -0400 @@ -0,0 +1,8 @@ +/* return either Sewall Wright's Fst or its Weir unbiased estimator +* parameters are as follows +* 1, 2 : frequencies of the two alleles in population 1 +* 3, 4 : frequencies of the two alleles in population 2 +* 5 : 0 = return Wright's formulation, 1 = return unbiased estimator +*/ + +double Fst(int, int, int, int, int);
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/genome_diversity/src/Huang.c Fri Sep 28 11:35:56 2012 -0400 @@ -0,0 +1,44 @@ +// Find highest scoring intervals, as discussed in Huang.h. + +#include "lib.h" +#include "Huang.h" + +void Huang(double x[], int n) { + double Score, oldScore; + int v, L, i; + + top = 0; // don't use location 0, so as to follow Fig. 6 + for (Score = 0.0, v = 0; v < n; ++v) { + oldScore = Score; + Score += x[v]; + if (x[v] < 0) + continue; + if (top > 0 && R[top].Rpos == v-1) { + // add edge to top subpath + R[top].Rpos = v; + R[top].Rscore = Score; + } else { + // create a one-edge subpath + ++top; + if (top >= MAX_R) + fatal("In Haung(), top is too big"); + R[top].Lpos = v-1; + R[top].Lscore = oldScore; + R[top].Rpos = v; + R[top].Rscore = Score; + R[top].Lower = top-1; + while ((L = R[top].Lower) > 0 && + R[L].Lscore > R[top].Lscore) + R[top].Lower = R[L].Lower; + } + // merge subpaths + while (top > 1 && (L = R[top].Lower) > 0 && + R[L].Rscore <= R[top].Rscore) { + R[L].Rpos = R[top].Rpos; + R[L].Rscore = R[top].Rscore; + top = L; + } + } + for (i = 1; i <= top; ++i) + R[i].Score = R[i].Rscore - R[i].Lscore; +}
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/genome_diversity/src/Huang.h Fri Sep 28 11:35:56 2012 -0400 @@ -0,0 +1,20 @@ +/* Find intervals of highest total score, i.e., such that adding postions to +* either end will decrease the total. We use the method of Fig. 6 of the paper: +* Xiaoqiu Huang, Pavel Pevzner, Webb Miller (1994) Parametric recomputing in +* alignment graphs. Combinatorial Pattern Matching (Springer Lecture Notes in +* Computer Science, 807), 87-101. +* +* The input scores are in x[0], x[1], ..., x[n-1], but the output regions +* are in R[1], R[2], ..., R[top]. R[i].Score is the total score of the i-th +* (in order of position) positive-scoring interval of x, which consists of of +* x[R[i].Lpos + 1] to x[R[i].Rpos]. +*/ +#define MAX_R 5000000 + +struct region { // a consecutive (relative to the reference) run of SNPs + double Lscore, Rscore, Score; + int Lpos, Rpos, Lower; +} R[MAX_R]; +int top; + +void Huang(double *x, int n);
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/genome_diversity/src/Makefile Fri Sep 28 11:35:56 2012 -0400 @@ -0,0 +1,49 @@ +CC = gcc +COPT = -O2 +CWARN = -W -Wall +CFLAGS = $(COPT) $(CWARN) +INSTALL_DIR = ../bin + +TARGETS = admix_prep coords2admix coverage dist_mat dpmix eval2pct \ + Fst_ave Fst_column pop sweep + +all: $(TARGETS) + +install: $(TARGETS) + if [ ! -d "$(INSTALL_DIR)" ]; then mkdir -p "$(INSTALL_DIR)"; fi + cp $(TARGETS) $(INSTALL_DIR) + +admix_prep: admix_prep.c lib.c + $(CC) $(CFLAGS) $^ -o $@ + +coords2admix: coords2admix.c lib.c + $(CC) $(CFLAGS) $^ -o $@ + +coverage: coverage.c lib.c + $(CC) $(CFLAGS) $^ -o $@ + +dist_mat: dist_mat.c lib.c + $(CC) $(CFLAGS) $^ -o $@ + +dpmix: dpmix.c lib.c + $(CC) $(CFLAGS) $^ -o $@ + +eval2pct: eval2pct.c lib.c + $(CC) $(CFLAGS) $^ -o $@ + +Fst_ave: Fst_ave.c Fst_lib.c lib.c + $(CC) $(CFLAGS) $^ -o $@ + +Fst_column: Fst_column.c Fst_lib.c lib.c + $(CC) $(CFLAGS) $^ -o $@ + +pop: pop.c lib.c + $(CC) $(CFLAGS) $^ -o $@ + +sweep: sweep.c lib.c Huang.c + $(CC) $(CFLAGS) $^ -o $@ + +.PHONY: clean + +clean: + rm -f $(TARGETS)
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/genome_diversity/src/admix_prep.c Fri Sep 28 11:35:56 2012 -0400 @@ -0,0 +1,119 @@ +/* admix_prep -- prepare the ".ped" and ".map" files (PLINK format) for input to +* the "admixture" program. +* +* argv[1] -- a Galaxy SNP table +* argv[2] -- required number of reads for each individual to use a SNP +* argv[3] -- required genotype quality for each individual to use a SNP +* argv[4] -- minimum spacing between SNPs on the same scaffold +* argv[k] for k > 4 have the form "13:fred", meaning that the 13th and 14th +* columns (base 0) give the allele counts for the individual or group named +* "fred". + +What it does on Galaxy +The tool converts a SNP table into two tables, called "admix.map" and "admix.ped", needed for estimating the population structure. The user can read or download those files, or simply pass this tool's output on to other programs. The user imposes conditions on which SNPs to consider, such as the minimum coverage and/or quality value for every individual, or the distance to the closest SNP in the same contig (as named in the first column of the SNP table). A useful piece of information produced by the tool is the number of SNPs meeting those conditions, which can be found by clicking on the "eye" after the program runs. + +*/ + +#include "lib.h" + +// bounds line length for a line of the Galaxy table +#define MOST 5000 +struct individual { + int column; + char *name; +} I[MOST/8]; // each individual has 4 columns and 4 tab characters +int nI; // number of individuals +int X[MOST]; // integer values in a row of the SNP table + +// bounds the number of SNPs that can be kept +#define MAX_KEEP 10000000 +char *S[MAX_KEEP]; // S[i] is a row of 2*nI alleles +int nK; + +int main(int argc, char **argv) { + FILE *fp, *ped, *map; + char *p, *z = " \t\n", buf[MOST], trash[MOST], name[100], *s, + scaf[100], prev_scaf[100]; + int i, j, m, min_coverage, min_quality, min_space, nsnp, genotype, + pos, prev_pos; + + if (argc < 5) + fatal("args: Galaxy-table min-cov min-qual min-space 13:fred 16:mary ..."); + min_coverage = atoi(argv[2]); + min_quality = atoi(argv[3]); + min_space = atoi(argv[4]); + + for (i = 5; i < argc; ++i, ++nI) { + if (nI >= MOST/8) + fatal("Too many individuals"); + if (sscanf(argv[i], "%d:%s", &(I[nI].column), name) != 2) + fatalf("bad arg: %s", argv[i]); + I[nI].name = copy_string(name); + } + + map = ckopen("admix.map", "w"); + + fp = ckopen(argv[1], "r"); + prev_scaf[0] = '\0'; + prev_pos = 0; + for (nsnp = 0; fgets(buf, MOST, fp); ) { + if (buf[0] == '#') + continue; + ++nsnp; + if (sscanf(buf, "%s %d", scaf, &pos) != 2) + fatalf("choke: %s", buf); + if (same_string(scaf, prev_scaf)) { + if (pos < prev_pos + min_space) + continue; + } else { + strcpy(prev_scaf, scaf); + prev_pos = -min_space; + } + + // X[i] = atoi(i-th word base-1) + strcpy(trash, buf); + for (i = 1, p = strtok(trash, z); p != NULL; + ++i, p = strtok(NULL, z)) + X[i] = atoi(p); + for (i = 0; i < nI; ++i) { + m = I[i].column; + if (X[m] + X[m+1] < min_coverage || X[m+3] < min_quality) + break; + } + if (i < nI) + continue; + prev_pos = pos; + + if (nK >= MAX_KEEP) + fatal("Too many SNPs"); + fprintf(map, "1 snp%d 0 %d\n", nsnp, nsnp+1); + s = S[nK++] = ckalloc(2*nI*sizeof(char)); + for (i = j = 0; i < nI; ++i, j += 2) { + genotype = X[I[i].column+2]; + if (genotype == 2) + s[j] = s[j+1] = '1'; + else if (genotype == 0) + s[j] = s[j+1] = '2'; + else if (genotype == 1) { + s[j] = '1'; + s[j+1] = '2'; + } else // undefined genotype + s[j] = s[j+1] = '0'; + } + } + + fclose(map); + + ped = ckopen("admix.ped", "w"); + for (i = 0; i < nI; ++i) { + fprintf(ped, "%s 1 0 0 1 1", I[i].name); + for (j = 0; j < nK; ++j) + fprintf(ped, " %c %c", S[j][2*i], S[j][2*i+1]); + putc('\n', ped); + } + + printf("Using %d of %d SNPs\n", nK, nsnp); + fclose(ped); + + return 0; +}
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/genome_diversity/src/coords2admix.c Fri Sep 28 11:35:56 2012 -0400 @@ -0,0 +1,91 @@ +// coords2admix -- add projections onto chords to information about +// coordinates in PCA plots + +#include "lib.h" + +#define MAX_POP 1000 +struct pop { + char *name; + float x, y; +} P[MAX_POP]; +int nP; + +int main(int argc, char **argv) { + FILE *fp; + char buf[500], x[100], y[100], z[100], cur_pop[100]; + int ncur, i, j, k; + float eig1, eig2, tot_x = 0.0, tot_y = 0.0, x1, y1, x2, y2, a, b, c, d; + + if (argc == 1) + fp = stdin; + else if (argc == 2) + fp = ckopen(argv[1], "r"); + else + fatal("optional arg: smartpca coordinates"); + + if (!fgets(buf, 500, fp)) + fatal("empty set of coordinates"); + if (sscanf(buf, "%s %s %s", x, y, z) != 3 || + !same_string(x, "#eigvals:")) + fatalf("cannot find eigenvalues: %s", buf); + printf("%s", buf); + eig1 = atof(y); + eig2 = atof(z); + //printf("eig1 = %f, eig2 = %f\n", eig1, eig2); + + strcpy(cur_pop, ""); + ncur = 0; + while (fgets(buf, 500, fp)) { + if (sscanf(buf, "%*s %s %s %s", x, y, z) != 3) + fatalf("gag: %s", buf); + printf("%s", buf); + if (!same_string(cur_pop, z)) { + if (ncur > 0) { + P[nP].name = copy_string(cur_pop); + P[nP].x = tot_x/ncur; + P[nP].y = tot_y/ncur; + ++nP; + } + ncur = 1; + strcpy(cur_pop, z); + tot_x = atof(x); + tot_y = atof(y); + } else { + ++ncur; + tot_x += atof(x); + tot_y += atof(y); + } + } + P[nP].name = copy_string(cur_pop); + P[nP].x = tot_x/ncur; + P[nP].y = tot_y/ncur; + ++nP; + +/* +for (i = 0; i < nP; ++i) +printf("%s %f %f\n", P[i].name, P[i].x, P[i].y); +*/ + + // loop over pairs of populations + for (i = 0; i < nP; ++i) { + x1 = eig1*P[i].x; + y1 = eig2*P[i].y; + for (j = i+1; j < nP; ++j) { + printf("\nprojection along chord %s -> %s\n", + P[i].name, P[j].name); + x2 = eig1*P[j].x; + y2 = eig2*P[j].y; + c = (x1-x2)*(x1-x2) + (y1-y2)*(y1-y2); + for (k = 0; k < nP; ++k) + if (k != i && k != j) { + a = eig1*P[k].x; + b = eig2*P[k].y; + d = (x2-x1)*(a-x1) + (y2-y1)*(b-y1); + printf(" %s: %f\n", P[k].name, d/c); + } + } + } + + return 0; +} +
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/genome_diversity/src/coverage.c Fri Sep 28 11:35:56 2012 -0400 @@ -0,0 +1,155 @@ +/* coverage -- report distributions of SNP coverage or quality for individuals, +* or coverage for populations +* +* argv{1] -- a Galaxy SNP table. For each individuals, the table has four +* columns (count of each allele, genotype, quality). +* argv[2] -- 0 = sequence coverage, 1 = genotype quality +* argv[3] -- file name for the text version of output (input for producing +* the graphical summary goes to stdout) +* argv[4], argv[5], ..., have the form "13:fred", meaning that the 13th +* 14th, and 16th columns (base 1) give the two allele counts +* and the quality for "fred", where "fred" can be the name of +* a population with several individuals (all named "fred") +What it does on Galaxy +The tool reports distributions of SNP reliability indicators for individuals or populations. The reliability can be measured by either the sequence coverage or the SAMtools quality value, though the notion of a population-level quality is not supported. Textual and graphical reports are generated, where the text output gives the cumulative distributions. +*/ + +#include "lib.h" + +// maximum length of a line from the table +#define MOST 5000 + +// the largest coverage or quality value being considered +#define MAX_VAL 1000 + +FILE *gp; // for text output + +// a population is the set of all indivuals with the same name +// (perhaps just a single individual) +struct pop { + int cov, n[MAX_VAL+1]; + long long sum, tot; + char *name; +} P[MOST/4]; +int nP; // number of populations + +// maps column to population +struct individual { + int col, pop; +} I[MOST/4]; +int nI; + +/* Report the distribution for each individual. P[i].n[k] is the number of SNPs +* of value (coverage or quality) k in population i, for k < MAX_VAL; +* I[i].n[MAX_VAL] is the number of SNPs of value k >= MAX_VAL. +* We print the percentages, p, of SNPs with value <= k, ending when all +* populations have reached a p >= 98%. +*/ +void print_cov() { + int i, j, k, last_j; + long long sum; + + // find where to stop printing + for (last_j = i = 0; i < nP; ++i) { + for (sum = j = 0; j <= MAX_VAL; ++j) + sum += P[i].n[j]; + P[i].tot = sum; + for (sum = j = 0; j <= MAX_VAL; ++j) { + sum += P[i].n[j]; + if (sum >= 0.98*P[i].tot) + break; + } + last_j = MAX(last_j, j); + } + + + ++last_j; + // print to stdout the output for graphing; not broken into short lines + for (j = 0; j < last_j; ++j) + printf("\t%3d", j); + putchar('\n'); + for (i = 0; i < nP; ++i) { + printf("%s", P[i].name); + for (sum = j = 0; j < last_j; ++j) { + sum += P[i].n[j]; + printf("\t%4.2f", 100.0*(float)sum/(float)P[i].tot); + } + putchar('\n'); + } + + // print a user-friendly version to the named file + // <= 20 numbers per row + for (j = 0; j < last_j; j += 20) { + fprintf(gp, "\n "); + for (k = j; k < MIN(j+20, last_j); ++k) + fprintf(gp, "%3d", k); + for (i = 0; i < nP; ++i) { + fprintf(gp, "\n%10s", P[i].name); + for (k = j; k < MIN(j+20, last_j); ++k) { + P[i].sum += P[i].n[k]; + fprintf(gp, "%3lld", + MIN(99, 100*P[i].sum/P[i].tot)); + } + } + fprintf(gp,"\n\n"); + } +} + +int main(int argc, char **argv) { + FILE *fp; + char buf[MOST], *z = " \t\n", *p; + int X[MOST], i, j, cov, m, quality, is_pop; + + if (argc < 5) + fatal("args: SNP-file quality-value? out-name 13:fred ... "); + quality = atoi(argv[2]); + gp = ckopen(argv[3], "w"); + // record the individuals and populations + for (nI = 0, i = 4; i < argc; ++i, ++nI) { + if (nI >= MOST) + fatal("Too many individuals"); + // allow spaces in names + if ((p = strchr(argv[i], ':')) == NULL) + fatalf("no colon: %s", argv[i]); + I[nI].col = atoi(argv[i]); + for (j = 0; j < nP && !same_string(p+1, P[j].name); ++j) + ; + if (j == nP) { // new population + is_pop = 1; + P[nP++].name = copy_string(p+1); + } + I[nI].pop = j; + } + if (is_pop && quality) + fatal("quality values for a population are not supported."); + + // Record the number of SNPs with coverage 0, 1, ..., MAX_VAL-1, + // or >= MAX_VAL for each individual. + fp = ckopen(argv[1], "r"); + while (fgets(buf, MOST, fp)) { + if (buf[0] == '#') + continue; + // P[i].cov is the total coverage for all individuals in pop i + for (i = 0; i < nP; ++i) + P[i].cov = 0; + // X[i] = atoi(i-th word base-1) + for (i = 1, p = strtok(buf, z); p != NULL; + ++i, p = strtok(NULL, z)) + X[i] = atoi(p); + for (i = 0; i < nI; ++i) { + m = I[i].col; + if (quality) + cov = X[m+3]; + else + cov = X[m] + X[m+1]; + P[I[i].pop].cov += cov; + } + for (i = 0; i < nP; ++i) + P[i].n[MIN(P[i].cov, MAX_VAL)]++; + } + + // Print the distributions. + print_cov(); + + return 0; +}
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/genome_diversity/src/dist_mat.c Fri Sep 28 11:35:56 2012 -0400 @@ -0,0 +1,196 @@ +/* dist_mat -- create a distance matrix in PHYLIP format for pairs of +* specified individuals, including by default the reference sequence +* +* argv[1] -- a Galaxy SNP table +* argv[2] -- min coverage +* argv[3] -- min quality +* argv[4] -- name of reference species (or "none") +* argv[5] -- 0 = distance from coverage; 1 = distance from genotype +* argv[6] -- name of file for the numbers of informative SNPs +* argv[7] -- name of file to write the Mega-format distance matrix +* argv[k] for k > 7 have the form "13:fred", meaning that the 13th and 14th +* columns (base 0) give the allele counts for the individual or group named +* "fred". + +What it does on Galaxy +This tool uses the selected SNP table to determine a "genetic distance" between each pair of selected individuals; the table of pairwise distances can be used by the Neighbor-Joining methods to construct a tree that depicts how the individuals are related. For a given pair of individuals, we find all SNP positions where both individuals have at least a minimum number of sequence "reads"; the individuals' distance at that SNP is defined as the absolute value of difference in the frequency of the first allele (equivalently: the second allele). For instance, if the first individuals has 5 reads of each allele and the second individual has respectivley 3 and 6 reads, then the frequencies are 1/2 and 1/3, giving a distance 1/6 at that SNP (provided that the minimum read total is at most 9). The output includes a report of the numbers of SNPs passing that thresold for each pair of individuals. + +*/ + +#include "lib.h" + +// bounds line length for a line of the Galaxy table + +#define MOST 5000 +#define MIN_SNPS 3 + +struct argument { + int column; + char *name; +} A[MOST]; +int nA; // number of individuals or groups + 1 (for the reference species) + +#define MOST_INDIVIDUALS 100 +#define SIZ 1+MOST_INDIVIDUALS // includes the reference + +double tot_diff[SIZ][SIZ]; +int ndiff[SIZ][SIZ], X[MOST]; + +int main(int argc, char **argv) { + FILE *fp, *gp, *mega; + char *p, *z = "\t\n", buf[MOST], name[100], B[100], C[100], D[100], + *nucs = "ACGT"; + int i, j, m, n, min_coverage, too_few, ref_allele = -1, has_ref, + min_quality, genotype; + double fi, fj, dist; + + if (argc < 8) + fatal("args: Galaxy-table min-cov min-qual min-snp ref-name genotype dist-out mega-out 13:fred 16:mary ..."); + min_coverage = atoi(argv[2]); + min_quality = atoi(argv[3]); + if (min_coverage <= 0 && min_quality <= 0) + fatal("coverage and/or quality of SNPs should be constrained"); + + if (same_string(argv[4], "none")) + has_ref = 0; + else { + has_ref = 1; + A[0].name = copy_string(argv[4]); + } + genotype = atoi(argv[5]); + gp = ckopen(argv[6], "w"); + mega = ckopen(argv[7], "w"); + fprintf(mega, "#mega\n!Title: Galaxy;\n"); + + for (nA = has_ref, i = 8; i < argc; ++i, ++nA) { + if (nA >= SIZ) + fatal("Too many individuals"); + if (sscanf(argv[i], "%d:%s", &(A[nA].column), name) != 2) + fatalf("bad arg: %s", argv[i]); + A[nA].name = copy_string(name); + } + fprintf(mega, + "!Format DataType=Distance DataFormat=LowerLeft NTaxa=%d;\n\n", + nA); + for (i = 0; i < nA; ++i) + fprintf(mega, "[%d] #%s\n", i+1, A[i].name); + fprintf(mega, "\n\n\n["); + for (i = 1; i <= nA; ++i) + fprintf(mega, "%4d", i); + fprintf(mega, " ]\n"); + fp = ckopen(argv[1], "r"); + while (fgets(buf, MOST, fp)) { + if (buf[0] == '#') + continue; + if (has_ref) { + // get the reference allele + if (sscanf(buf, "%*s %*s %s %s %*s %*s %*s %s", B, C, D) + != 3) + fatalf("3 fields: %s", buf); + if (strchr(nucs, B[0]) == NULL || + strchr(nucs, C[0]) == NULL) + fatalf("not nucs : %s %s", B, C); + if (D[0] == B[0]) + ref_allele = 1; + else if (D[0] == C[0]) + ref_allele = 2; + else if (strchr(nucs, D[0]) != NULL) + ref_allele = 3; + else { + if (D[0] != '-' && D[0] != 'N') + fatalf("what is this: %s", D); + ref_allele = -1; + } + } + + // X[i] = atoi(i-th word base-1) + for (i = 1, p = strtok(buf, z); p != NULL; + ++i, p = strtok(NULL, z)) + X[i] = atoi(p); + for (i = has_ref; i < nA; ++i) { + m = A[i].column; + if (X[m] + X[m+1] < min_coverage || + X[m+3] < min_quality) + continue; + + // frequency of the second allele + if (genotype) { + if (X[m+2] == -1) + continue; // no genotype + fi = (double)X[m+2]; + } else + fi = (double)X[m+1] / (double)(X[m]+X[m+1]); + if (has_ref && ref_allele > 0) { + ndiff[0][i]++; + // reference allele might be different from both + if (ref_allele == 1) + tot_diff[0][i] += fi; + else if (ref_allele == 2) + tot_diff[0][i] += (1.0 - fi); + else + tot_diff[0][i] += 1.0; + } + for (j = i+1; j < nA; ++j) { + n = A[j].column; + if (X[n] + X[n+1] < min_coverage || + X[n+3] < min_quality) + continue; + if (genotype && X[n+2] == -1) + continue; + ndiff[i][j]++; + if (genotype) + fj = (double)X[n+2]; + else + fj = (double)X[n+1] / + (double)(X[n] + X[n+1]); + fj -= fi; + // add abs. value of difference in frequencies + tot_diff[i][j] += (fj >= 0.0 ? fj : -fj); + } + + } + } + for (i = too_few = 0; i < nA; ++i) + for (j = i+1; j < nA; ++j) + if (ndiff[i][j] < MIN_SNPS) { + too_few = 1; + fprintf(stderr, + "%s and %s have only %d informative SNPs\n", + A[i].name, A[j].name, ndiff[i][j]); + } + if (too_few) + fatal("remove individuals or relax constraints"); + + // print distances + printf("%d\n", nA); + for (i = 0; i < nA; ++i) { + printf("%9s", A[i].name); + fprintf(mega, "[%d] ", i+1); + for (j = 0; j < i; ++j) { + dist = tot_diff[j][i]/(double)ndiff[j][i]; + printf(" %6.4f", dist); + fprintf(mega, " %6.4f", dist); + } + fprintf(mega, " \n"); + printf(" 0.0000"); + for (j = i+1; j < nA; ++j) + printf(" %6.4f", + tot_diff[i][j]/(double)ndiff[i][j]); + putchar('\n'); + } + fprintf(mega, "\n\n\n\n\n"); + fclose(mega); + + // print numbers of SNPs + for (i = 0; i < nA; ++i) { + fprintf(gp, "%9s", A[i].name); + for (j = 0; j < i; ++j) + fprintf(gp, " %8d", ndiff[j][i]); + fprintf(gp, " 0"); + for (j = i+1; j < nA; ++j) + fprintf(gp," %8d", ndiff[i][j]); + putc('\n', gp); + } + + return 0; +}
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/genome_diversity/src/dpmix.c Fri Sep 28 11:35:56 2012 -0400 @@ -0,0 +1,510 @@ +/* dpmix -- admixture using dynamic programming +* +* argv{1] = a Galaxy SNP table. For each of several individuals, the table +* has four columns (#A, #B, genotype, quality) -- SNPs on the same +* chromosome must appear together, and in order of position +* argv[2] = column with the chromosome name (position is the next column) +* argv[3] = "all" or e.g., "chr20" +* argv[4] = 1 if ancestral allele frequencies are estimated from SAMtools +* genotypes; 0 means use read-coverage data. +* argv[5] = 1 to add logarithms of probabilities, allowing unobserve alleles, +* 0 to simply add probabilities +* argv[6] = switch penalty (>= 0) +* argv[7] = file giving heterochromatic intervals ('-' means that no file is +* given) +* argv[8] = file name for additional output +* argv[9], argv[10], ..., have the form "13:1:Peter", "13:2:Paul" or +* "13:0:Mary", meaning that the 13th and 14th columns (base 1) +* give the allele counts for an individual that is in ancestral +* population 1, ancestral population 2, or is a potentially admixed +* individual, resp. + +What it does on Galaxy +The user specifies two "ancestral" populations (i.e., sources for chromosomes) and a set of potentially admixed individuals, and chooses between the sequence coverage or the estimated genotypes to measure the similarity of genomic intervals in admixed individuals to the two classes of ancestral chromosomes. The user also picks a "switch penalty", typically between 10 and 100. For each potentially admixed individual, the program divides the genome into three "genotypes": (0) homozygous for the second ancestral population (i.e., both chromosomes from that population), (1) heterozygous, or (2) homozygous for the second ancestral population. Parts of a reference chromosome that are labeled as "heterochromatic" are given the non-genotype, 3. Smaller values of the switch penalty (corresponding to more ancient admixture events) generally lead to the reconstruction of more frequent changes between genotypes. +*/ + +#include "lib.h" +//#include <math.h> + +// maximum length of a line from the table +#define MOST 5000 + +// we create a linked list of "events" on a chromosome -- mostly SNPs, but +// also ends of hetorochomatic intervals +struct snp { + double F1, F2; // reference allele frequencies in the two populations + int pos, *g, // position and an array of admixed genotypes + type; // 0 = SNP, 1 = start of het. interval, 2 = end + struct snp *prev; // we keep the list in order of decreasing pos +} *last; + +// array of potentially admixed individuals +struct admixed { + char *name; + int gcol, ge20, gt02; + long long x[4]; // number of reference bp in each state +} A[MOST]; + +// information about "ancestral" individuals, namely column and population +struct ances { + int col, pop; + char *name; +} C[MOST]; + +// heterochromatic intervals +struct het { + char *chr; + int b, e; +} H[MOST]; + +// global variables +int *B[4], // backpointer to state at the previous SNP (or event) + *P; // chromosome position +int nH, nI, nG, genotypes, nsnp, debug, chr_col, logs; +char this_chr[100]; +double switch_penalty; +char buf[MOST], *status; +FILE *fp, *out; + +// probability of producing genotype g in admixture state s +// given reference allele frequencies f1 and f2 in the ancestral populations +double score (double f1, double f2, int g, int s) { + double p; + + if (s == 2) { // homozygous for the first ancestral population + if (g == 2) + p = f1*f1; + else if (g == 0) + p = (1.0-f1)*(1.0-f1); + else + p = 2.0*f1*(1.0-f1); + } else if (s == 0) { // homozygous for the second ancestral population + if (g == 2) + p = f2*f2; + else if (g == 0) + p = (1.0-f2)*(1.0-f2); + else + p = 2.0*f2*(1.0-f2); + } else { // one chromosome from each ancestral population + if (s != 1) + fatalf("bad state %d", s); + if (g == 2) + p = f1*f2; + else if (g == 0) + p = (1.0-f1)*(1.0-f2); + else + p = f1*(1.0-f2) + (1.0-f1)*f2; + } + + if (p < 0.0) + fatalf("%f %f %d %d => %f", f1, f2, g, s, p); + if (!logs) + return p; +#ifdef NEVER + if (p == 0.0) + return -5.0; + p = log(p); + if (p < -5.0) + p = -5.0; + return p; +#endif + fatal("dpmix: cannot happen"); +} + +char *get_chr_name() { + static char tmp[MOST]; + char *s, *z = "\t\n"; + int i = chr_col; + + strcpy(tmp, buf); + s = strtok(tmp, z); + while (--i > 0) + s = strtok(NULL, z); + return s; +} + +/* Process the a-th potentially admixed individual. +* We think of a graph with nodes (event, state) for each event (SNP or +* end-point of a heterochromatic interval on the current chromosome) and state +* = 0, 1, 2, 3 (corresponding to genotypes 0, 1, and 2, plus 3 = +* heterochromatin); for events other than the last one, there are edges from +* each (event, state) to (event+1, k) for 0 <= k <= 3. An edge (event, j) to +* (event+1, k) has penalty 0 if j = k and penalty switch_penalty otherwise. +* The bonus at SNP node (event, state) for 0 <= state <= 2 is the probability +* of generating the genotype observed in the a-th potentially admixed +* individual given the allele frequences in the two ancestral populations and +* the assumed admixture state in this region of the chromosome. The score of a +* path is the sum of the node bonuses minus the sum of the edge penalties. +* +* Working backwards through the events, we compute the maximum path score, +* from[state], from (event,state) back to the closest admixed interval. +* To force paths to reach state 3 at an event signalling the start of a +* heterochromatic interval (type = 1), but to avoid state 3 at other events, +* we assign huge but arbitrary negative scores (see "avoid", below). +* At (event,state), B[event][state] is the backpointer to the state at +* event+1 on an optimal path. Finally, we follow backpointers to partition +* the chromosome into admixture states. +*/ +void one_admix(int a) { + int i, j, m, state, prev_pos, b; + double from[4], f[4], ff[4], avoid = -1000000.0; + struct snp *p; + + // from[i] = highest score of a path from the current event + // (usually a SNP) to the next (to the right) heterochromatic interval + // or the end of the chromosome. The score of the path is the sum of + // SNP scores minus (switch_penalty times number of state switches). + // We assume that the last two event on the chromosome are the start + // and end of a heterochromatic interval (possibly of length 0)/ + for (i = 0; i < 4; ++i) + from[i] = 0; + for (i = nsnp-1, p = last; i >= 0 && p != NULL; --i, p = p->prev) { + for (state = 0; state < 4; ++state) { + // find highest path-score from this event onward + for (m = j = 0; j < 4; ++j) { + f[j] = from[j]; + if (j != state) + f[j] -= switch_penalty; + //if (abs(j-state) == 2) + //from[j] -= switch_penalty; + if (f[j] > f[m]) + m = j; + } + B[state][i] = m; + ff[state] = f[m]; + if (state < 3 && p->type == 0) + ff[state] += + score(p->F1, p->F2, p->g[a], state); + } + if (p->type == 1) { + // start of heterochomatic interval. Force paths + // reaching this point to go through state 3 + from[3] = 0; + from[0] = from[1] = from[2] = avoid; + } else { + for (j = 0; j < 3; ++j) + from[j] = ff[j]; + from[3] = avoid; + } + if (debug) + fprintf(stderr, "%d: %f(%d) %f(%d) %f(%d) %f(%d)\n", + i, from[0], B[0][i], from[1], B[1][i], from[2], + B[2][i], from[3], B[3][i]); + } + + // find the best initial state + for (state = 0, j = 1; j < 4; ++j) + if (from[j] > from[state]) + state = j; + + // trace back to find the switch points + // A[a].x[state] records the total length of intervals in each state + for (prev_pos = i = 0; i < nsnp; ++i) { + if ((b = B[state][i]) != state) { + if (prev_pos < P[i+1]-1) + printf("%s\t%d\t%d\t%d\t%s\n", + this_chr, prev_pos, P[i+1], state, A[a].name); + A[a].x[state] += (P[i+1]-prev_pos); + prev_pos = P[i+1]; + state = b; + } + } +} + +// Add a heterochromatic interval to the SNP list, where type = 1 signifies +// the start of the interval, 2 signifies the end. +void add_het(int b, int type) { + struct snp *new = ckalloc(sizeof(struct snp)); + int i; + + new->F1 = new->F2 = 0.0; + new->pos = b; + new->type = type; + new->g = ckalloc(nG*sizeof(int)); + for (i = 0; i < nG; ++i) + new->g[i] = 0; + new->prev = last; + last = new; +} + +/* Process one chromosome. Read the SNPs on the chromosome (the first one is +* already in the buf). Boil each SNP down to the contents of a SNP entry +* (pos, F1, F2, g[]) and put it in the linked list. Also, intersperse the +* "events" corresponding to the start and end of a heterochromatic interval. +* Then call the dynamic-programming routine for each potentially admixed +* individual. +*/ +void one_chr() { + char *s, *z = "\t\n"; + int X[MOST], n, i, g, A1, B1, A2, B2, a, do_read, p, pos, het; + struct snp *new; + double F1, F2; + + strcpy(this_chr, get_chr_name()); + nsnp = 0; + last = NULL; + // advance to this chromosome in the list of heterochromatic intervals + for (het = 0; het < nH && !same_string(this_chr, H[het].chr); ++het) + ; + // loop over the SNPs on the current chromosome + for (do_read = 0; ; do_read = 1) { + if (do_read && (status = fgets(buf, MOST, fp)) == NULL) + break; + if (!same_string(get_chr_name(), this_chr)) + break; + + // set X[i] = atoi(i-th word of buf), i is base 1 + for (i = 1, s = strtok(buf, z); s != NULL; + ++i, s = strtok(NULL, z)) + X[i] = atoi(s); + + // insert events (pseudo-SNPs) for heterochomatin intervals + // coming before the SNP + pos = X[chr_col+1]; + while (het < nH && same_string(this_chr, H[het].chr) && + H[het].b < pos) { + add_het(H[het].b, 1); + add_het(H[het].e, 2); + nsnp+= 2; + ++het; + } + + // should we discard this SNP? + if (pos == -1) // SNP not mapped to the reference + continue; + for (i = 0; i < nG && X[A[i].gcol] >= 0; ++i) + ; + if (i < nG) // genotype of admixed individual not called + continue; + + // add SNP to a "backward pointing" linked list, recording the + // major allele frequencies in the two reference populations + // and genotypes in the potential admixed individuals + for (i = A1 = B1 = A2 = B2 = 0; i < nI; ++i) { + n = C[i].col; + p = C[i].pop; + if (genotypes) { + g = X[n+2]; + if (g == -1) + continue; + if (g < 0 || g > 2) + fatalf("invalid genotype %d", g); + if (p == 1) { + A1 += g; + B1 += (2 - g); + } else if (p == 2) { + A2 += g; + B2 += (2 - g); + } + } else { // use read counts + if (p == 1) { + A1 += X[n]; + B1 += X[n+1]; + } else if (p == 2) { + A2 += X[n]; + B2 += X[n+1]; + } + } + } + if (A1+B1 == 0 || A2+B2 == 0) + continue; + ++nsnp; + new = ckalloc(sizeof(struct snp)); + new->pos = X[chr_col+1]; + new->F1 = F1 = (double)A1/(double)(A1+B1); + new->F2 = F2 = (double)A2/(double)(A2+B2); + new->type = 0; + new->g = ckalloc(nG*sizeof(int)); + for (i = 0; i < nG; ++i) { + g = new->g[i] = X[A[i].gcol]; + if (score(F1, F2, g, 2) >= score(F1, F2, g, 0)) + A[i].ge20++; + else + A[i].gt02++; + } + if (F1 < 0.0 || F1 > 1.0) + fatalf("F1 = %f (A1 = %d, B1 = %d) at snp %d", + F1, A1, B1, nsnp); + if (F2 < 0.0 || F2 > 1.0) + fatalf("F2 = %f (A2 = %d, B2 = %d) at snp %d", + F2, A2, B2, nsnp); + new->prev = last; + last = new; + } + // insert heterochomatin intervals that follow all SN + while (het < nH && same_string(this_chr, H[het].chr)) { + add_het(H[het].b, 1); + add_het(H[het].e, 2); + nsnp += 2; + ++het; + } +/* +printf("nsnp = %d\n", nsnp); +for (i = nsnp-1, new = last; i >= 0 && new != NULL; --i, new = new->prev) { +printf("%d %d ", new->pos, new->type); +printf("%g %g ", new->F1, new->F2); +for (a = 0; a < nG; ++a) +printf("%d", new->g[a]); +putchar('\n'); +} +//exit(0); +printf("\nbacktrace\n"); +*/ + + // allocate arrays for the DP analysis + P = ckalloc(nsnp*sizeof(int)); // position of each event + for (i = nsnp-1, new = last; i >= 0 && new != NULL; + --i, new = new->prev) + P[i] = new->pos; + + for (i = 0; i < 4; ++i) { // space for back-pointers + B[i] = ckalloc((nsnp+1)*sizeof(int)); + B[i][nsnp] = 0; + } + + // loop over possibly admixed individuals + for (a = 0; a < nG; ++a) + one_admix(a); + + // free the allocated storage + while (last != NULL) { + new = last; + last = last->prev; + free(new->g); + free(new); + } + free(P); + for (i = 0; i < 4; ++i) + free(B[i]); +} + +int main(int argc, char **argv) { + int n, i, j, k, saw[3]; + long long het_len, ref_len; + float N; + char nam[100], *chr; + + if (argc < 9) + fatal("args: table chr-col chr data-source logs switch heterochrom outfile n:1:name1 m:2:name2 ..."); + if (same_string(argv[argc-1], "debug")) { + debug = 1; + --argc; + } + + // handle command-line arguments + chr_col = atoi(argv[2]); + chr = argv[3]; + genotypes = atoi(argv[4]); + + logs = atoi(argv[5]); + if (logs) + fatal("logarithms of probabilities -- under development"); + //if (logs) switch_penalty = log(switch_penalty); + + switch_penalty = atof(argv[6]); + if (switch_penalty < 0.0) + fatal("negative switch penalty"); + out = ckopen(argv[8], "w"); + + het_len = ref_len = 0; + if (!same_string(argv[7], "-")) { + fp = ckopen(argv[7], "r"); + while (fgets(buf, MOST, fp)) { + if (nH >= MOST) + fatal("Too many heterochromatic intervals"); + if (sscanf(buf, "%s %d %d", nam, &i, &j) != 3) + fatalf("gagging: %s", buf); + H[nH].chr = copy_string(nam); + H[nH].b = i; + H[nH].e = j; + // assumes last event per chrom. is a het. interval + if (nH > 0 && !same_string(nam, H[nH-1].chr)) + ref_len += j; + het_len += (j - i); + ++nH; + } + fclose(fp); + } + ref_len += H[nH-1].e; + + // populations must be disjoint + saw[1] = saw[2] = 0; + for (i = 9; i < argc; ++i) { + if (sscanf(argv[i], "%d:%d:%s", &j, &k, nam) != 3) + fatalf("not like 13:2:fred : %s", argv[i]); + if (k < 0 || k > 2) + fatalf("not population 0, 1 or 2: %s", argv[i]); + saw[k] = 1; + + // seen this individual (i.e., column) before?? + for (n = 0; n < nI && C[n].col != j; ++n) + ; + if (n < nI) + fatal("populations are not disjoint"); + if (k == 0) { // admixed individual + if (nG >= MOST) + fatal("Too many admixed individuals"); + A[nG].name = copy_string(nam); + A[nG++].gcol = j+2; + } else { // in an ancestral population + if (nI >= MOST) + fatal("Too many ancestral individuals"); + C[nI].col = j; + C[nI].pop = k; + C[nI++].name = copy_string(nam); + } + } + if (saw[0] == 0) + fatal("no admixed individual is specified"); + if (saw[1] == 0) + fatal("first reference population is empty"); + if (saw[2] == 0) + fatal("second reference population is empty"); + + // start the output file of text + for (k = 1; k <= 2; ++k) { + fprintf(out, "state %d agrees with:", k == 1 ? 2 : 0); + for (i = 0; i < nI; ++i) + if (C[i].pop == k) + fprintf(out, " %s", C[i].name); + putc('\n', out); + } + putc('\n', out); + + fp = ckopen(argv[1], "r"); + while ((status = fgets(buf, MOST, fp)) != NULL && buf[0] == '#') + ; + if (same_string(chr, "all")) + while (status != NULL) + one_chr(); + else { // skip to the specified chromosome + while (!same_string(chr, get_chr_name()) && + (status = fgets(buf, MOST, fp)) != NULL) + ; + if (status != NULL) + one_chr(); + } + for (i = 0; i < nG; ++i) { + fprintf(out, + "%s: %d SNPs where state 2 is at least as likely as state 0\n", + A[i].name, A[i].ge20); + fprintf(out, + "%s: %d SNPs where state 0 is more likely than state 2\n\n", + A[i].name, A[i].gt02); + } + // write fractions in each state to the output text file + + if (ref_len) + fprintf(out, + "%lld of %lld reference bp (%1.1f%%) are heterochromatin\n\n", + het_len, ref_len, 100.0*(float)het_len/(float)ref_len); + + for (i = 0; i < nG; ++i) { + N = (float)(A[i].x[0] + A[i].x[1] + A[i].x[2])/100.0; + fprintf(out, "%s: 0 = %1.1f%%, 1 = %1.1f%%, 2 = %1.1f%%\n", + A[i].name, (float)A[i].x[0]/N, (float)A[i].x[1]/N, + (float)A[i].x[2]/N); + } + + return 0; +}
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/genome_diversity/src/eval2pct.c Fri Sep 28 11:35:56 2012 -0400 @@ -0,0 +1,26 @@ +#include "lib.h" + +#define MAX_EVAL 1000 + +float E[MAX_EVAL]; +int nE; + +int main (int argc, char **argv) { + FILE *fp; + char buf[500]; + int i; + float tot; + + fp = (argc== 1 ? stdin : ckopen(argv[1], "r")); + while (fgets(buf, 500, fp)) { + if (nE >= MAX_EVAL) + fatal("Too many eigenvalues"); + E[nE++] = atof(buf); + } + for (tot = 0.0, i = 0; i < nE; ++i) + tot += E[i]; + printf("Percentage explained by eigenvectors:\n"); + for (i = 0 ; i < nE && E[i] > 0.0; ++i) + printf("%d: %1.1f%%\n", i+1, 100.0*(float)E[i]/tot); + return 0; +}
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/genome_diversity/src/lib.c Fri Sep 28 11:35:56 2012 -0400 @@ -0,0 +1,71 @@ +// lib.c -- a little library of C procudures + +#include "lib.h" + +char *argv0; + +/* print_argv0 ---------------------------------------- print name of program */ +void print_argv0(void) +{ + if (argv0) { + char *p = strrchr(argv0, '/'); + (void)fprintf(stderr, "%s: ", p ? p+1 : argv0); + } +} + +/* fatal ---------------------------------------------- print message and die */ +void fatal(const char *msg) +{ + fatalf("%s", msg); +} + +/* fatalf --------------------------------- format message, print it, and die */ +void fatalf(const char *fmt, ...) +{ + va_list ap; + va_start(ap, fmt); + fflush(stdout); + print_argv0(); + (void)vfprintf(stderr, fmt, ap); + (void)fputc('\n', stderr); + va_end(ap); + exit(1); +} + +/* ckopen -------------------------------------- open file; check for success */ +FILE *ckopen(const char *name, const char *mode) +{ + FILE *fp; + + if ((fp = fopen(name, mode)) == NULL) + fatalf("Cannot open %s.", name); + return fp; +} + +/* ckalloc -------------------------------- allocate space; check for success */ +void *ckalloc(size_t amount) +{ + void *p; + + if ((long)amount < 0) /* was "<= 0" -CR */ + fatal("ckalloc: request for negative space."); + if (amount == 0) + amount = 1; /* ANSI portability hack */ + if ((p = malloc(amount)) == NULL) + fatalf("Ran out of memory trying to allocate %lu.", + (unsigned long)amount); + return p; +} + +/* same_string ------------------ determine whether two strings are identical */ +bool same_string(const char *s, const char *t) +{ + return (strcmp(s, t) == 0); +} + +/* copy_string ---------------------- save string s somewhere; return address */ +char *copy_string(const char *s) +{ + char *p = ckalloc(strlen(s)+1); /* +1 to hold '\0' */ + return strcpy(p, s); +}
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/genome_diversity/src/lib.h Fri Sep 28 11:35:56 2012 -0400 @@ -0,0 +1,33 @@ +// lib.h -- header file for some useful procedures + +#include <stdio.h> +#include <stdlib.h> +#include <string.h> +#include <ctype.h> +#include <limits.h> /* INT_MAX, INT_MIN, LONG_MAX, LONG_MIN, etc. */ +#include <stdarg.h> + +typedef unsigned char uchar; +typedef int bool; + +extern char *argv0; + +void print_argv0(void); +#ifdef __GNUC__ /* avoid some "foo might be used uninitialized" warnings */ + void fatal(const char *msg) __attribute__ ((noreturn)); + void fatalf(const char *fmt, ...) __attribute__ ((noreturn)); + void fatalfr(const char *fmt, ...) __attribute__ ((noreturn)); +#else + void fatal(const char *msg); + void fatalf(const char *fmt, ...); + void fatalfr(const char *fmt, ...); +#endif +FILE *ckopen(const char *name, const char *mode); +void *ckalloc(size_t amount); +bool same_string(const char *s, const char *t); +char *copy_string(const char *s); + +#undef MAX +#define MAX(x,y) ((x) > (y) ? (x) : (y)) +#undef MIN +#define MIN(x,y) ((x) < (y) ? (x) : (y))
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/genome_diversity/src/pop.c Fri Sep 28 11:35:56 2012 -0400 @@ -0,0 +1,76 @@ +/* pop -- add four columns (allele counts, genotype, maximum quality) for a +* specified population to a Galaxy SNP table, or enforce bounds +* +* argv[1] = file containing a Galaxy table +* argv[2] = lower bound on total coverage (-1 = no lower bound) +* argv[3] = upper bound on total coverae (-1 if no bound) +* argv[4] = lower bound on individual coverage (-1 = no bound) +* argv[5] = lower bound on individual quality value (-1 = no bound) +* argv[6] ... are the starting columns (base-1) for the chosen individuals + +What it does on Galaxy +The user specifies that some of the individuals in the selected SNP table are form a "population" that has been previously defined using the Galaxy tool to select individuals from a SNP table. One option is for the program to append four columns to the table, giving the total counts for the two alleles, the "genotype" for the population and the maximum quality value, taken over all indivuals in the population. If all defined genotypes in the population are 2 (agree with the reference), the population's genotype is 2; similarly for 0; otherwise the genoype is 1 (unless all individuals have undefined genotype, in which case it is -1. The other option is to remove rows from the table for which the total coverage for the population is either too low or too high, and/or if the individual coverage or quality value is too low. +*/ + +#include "lib.h" + +// most characters allowed in a row of the table +#define MOST 50000 + +// column for the relevant individuals/groups +int col[MOST]; +int nI; + +int main(int argc, char **argv) { + FILE *fp; + char *p, *z = "\t\n", buf[MOST], trash[MOST]; + int X[MOST], m, i, A, B, G, Q, lo, hi, indiv, qual, g, q; + + if (argc < 3) + fatalf("args: SNP-table low high col1 col2 ..."); + + lo = atoi(argv[2]); + hi = atoi(argv[3]); + indiv = atoi(argv[4]); + qual = atoi(argv[5]); + for (i = 6, nI = 0; i < argc; ++i, ++nI) + col[nI] = atoi(argv[i]); + + fp = ckopen(argv[1], "r"); + while (fgets(buf, MOST, fp)) { + if (buf[0] == '#') + continue; + strcpy(trash, buf); + // set X[i] = atoi(i-th word of s), i is base 0 + for (i = 1, p = strtok(trash, z); p != NULL; + ++i, p = strtok(NULL, z)) + X[i] = atoi(p); + for (i = A = B = Q = 0, G = -1; i < nI; ++i) { + m = col[i]; + if (X[m]+X[m+1] < indiv || (q = X[m+3]) < qual) + break; + A += X[m]; + B += X[m+1]; + g = X[m+2]; + if (g != -1) { + if (G == -1) // first time + G = g; + else if (G != g) + G = 1; + } + Q = MAX(Q, q); + } + if (i < nI) // check bounds on the population's individuals + continue; + if (lo == -1 && hi == -1 && indiv == -1 && qual == -1) { + // add columns + if ((p = strchr(buf, '\n')) != NULL) + *p = '\0'; + printf("%s\t%d\t%d\t%d\t%d\n", buf, A, B, G, Q); + } else if (A+B >= lo && (hi == -1 || A+B <= hi)) + // coverage meets the population-level restrictions + printf("%s", buf); + } + + return 0; +}
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/genome_diversity/src/sweep.c Fri Sep 28 11:35:56 2012 -0400 @@ -0,0 +1,279 @@ +/* sweep -- find regions of the genome with high scores (e.g., Fst scores). +* +* argv[1] -- file containing a Galaxy table +* argv[2] -- column number (base-1) for the chromosome name +* argv[3] -- column number for the (base-0) chromosomal position +* argv[4] -- column number for a score for the position +* argv[5] -- a percentage, such as "95", or a raw score, such as "=0.9". +* argv[6] -- the number of randomizations (shuffles) of the scores +* argv[7] -- [optional] if present and non-zero, report SNPs +* +* The program first determines a threshold such that the stated percentage +* of the scores are below that threshold (or uses the provided number if +* argv[5] starts with "="). The program subtracts the threshold +* from each score, then looks for maximal-scoring runs of SNPs, i.e., where +* adding or subtracting SNPs from an end of then run always decreases the +* total score. These regions are printed in order of descreasing total score. +* To determine a cutoff for the printed regions, the programs takes the maximum +* score over all regions observed in a specified number of shuffles of the +* list of scores. If argv[6] = 0, then all maximal-scoring runs of at least +* 4 table entries are printed. + +What it does on Galaxy +The user selects a SNP table and specifies the columns containing (1) chromosome, (2) position, (3) scores (such as an Fst-value for the SNP), (4) a percentage or raw score for the "cutoff" and (5) the number of times the data should be radomized (only intervals with score exceeding the maximum for the randomized data are reported). If a percentage (e.g. 95%) is specified for #3, then that percentile of the scores is used as the cutoff; this may not work well if many SNPs have the same score. The program subtracts the cutoff from every score, then finds genomic intervals (i.e., consecutive runs of SNPs) whose total score cannot be increased by adding or subtracting one or more SNPs at the ends of the interval. +*/ + +#include "lib.h" +#include "Huang.h" + +// maximum number of rows in any processed table +#define MANY 20000000 +#define BUF_SIZE 5000 +#define MAX_WINDOW 1000000 + +double X[MANY]; // holds all scores +int nX; + +// position-score pairs for a single chromosome +struct score { + int pos; + double x; // original score, then shifted score +} S[MANY]; +int nS; + +struct snp { + int pos; + double x; + struct snp *next; +}; + +// structure to hold the maximum-scoring chromosomal intervals +struct sweep { + float score; + char *chr; + int b, e; + struct snp *snps; +} W[MAX_WINDOW]; +int nW; + +// return the linked list of SNPs in positions b to e +struct snp *add_snps(int b, int e) { + struct snp *first = NULL, *last = NULL, *new; + int i; + for (i = b; i <= e; ++i) + if (S[i].pos >= 0) { + new = ckalloc(sizeof(*new)); + new->pos = S[i].pos; + new->x = S[i].x; + new->next = NULL; + if (first == NULL) + first = new; + else + last->next = new; + last = new; + } + return first; +} + +// given a table row, return a pointer to the item in a particular column +char *get_col(char *buf, int col) { + static char temp[BUF_SIZE], *p; + int i; + char *z = " \t\n"; + + strcpy(temp, buf); + for (p = strtok(temp, z), i = 1; *p && i < col; + p = strtok(NULL, z), ++i) + ; + if (p == NULL) + fatalf("no column %d in %s", col, buf); + return p; +} + +// fill S[] with position-score pairs for the next chromosome +// return 0 for EOF +int get_chr(FILE *fp, int chr_col, int pos_col, int score_col, char *chr) { + static char buf[BUF_SIZE]; + static int init = 1; + char *status; + + if (init) { + while ((status = fgets(buf, BUF_SIZE, fp)) != NULL && + buf[0] == '#') + ; + if (status == NULL) + fatal("empty table"); + init = 0; + } + if (buf[0] == '\0') + return 0; + + if (buf[0] == '#') + fatal("cannot happen"); + strcpy(chr, get_col(buf, chr_col)); + S[0].pos = atoi(get_col(buf, pos_col)); + S[0].x = atof(get_col(buf, score_col)); + for (nS = 1; ; ++nS) { + if (!fgets(buf, BUF_SIZE, fp)) { + buf[0] = '\0'; + return 1; + } + if (!same_string(chr, get_col(buf, chr_col))) + break; + S[nS].pos = atoi(get_col(buf, pos_col)); + S[nS].x = atof(get_col(buf, score_col)); + } + return 1; +} + +// for sorting genomic intervals by *decreasing* score +int Wcompar(struct sweep *a, struct sweep *b) { + float y = a->score, z = b->score; + + if (y > z) + return -1; + if (y < z) + return 1; + return 0; +} + +// for sorting an array of scores into increasing order +int fcompar(double *a, double *b) { + if (*a < *b) + return -1; + if (*a > *b) + return 1; + return 0; +} + +/* shuffle the values S[0], S[1], ... , S[nscores-1]; +* Uses Algorithm P in page 125 of "The Art of Computer Programming (Vol II) +* Seminumerical Programming", by Donald Knuth, Addison-Wesley, 1971. +*/ +void shuffle_scores() { + int i, j; + double temp; + + for (i = nX-1; i > 0; --i) { + // swap what's in location i with location j, where 0 <= j <= i + j = random() % (i+1); + temp = X[i]; + X[i] = X[j]; + X[j] = temp; + } +} + +// return the best interval score (R[i] is the struct operated by Huang()) +double best() { + int i; + double bestScore; + + Huang(X, nX); + + for (bestScore = 0.0, i = 1; i <= top; ++i) + bestScore = MAX(R[i].Score, bestScore); + return bestScore; +} + +int main(int argc, char **argv) { + FILE *fp; + char buf[BUF_SIZE], chr[100], *a; + double shift = 0.0, cutoff; + int i, b, e, chr_col, pos_col, score_col, nshuffle, snps = 0; + struct snp *s; + + if (argc != 7 && argc != 8) + fatal("args: table chr_col pos_col score_col threhold randomizations [SNPs]"); + + // process command-line arguments + chr_col = atoi(argv[2]); + pos_col = atoi(argv[3]); + score_col = atoi(argv[4]); + a = argv[5]; + fp = ckopen(argv[1], "r"); + if (argc == 8) + snps = atoi(argv[7]); + if (isdigit(a[0])) { + for (nX = 0; nX < MANY && fgets(buf, BUF_SIZE, fp); ) { + if (buf[0] == '#') + continue; + X[nX++] = atof(get_col(buf, score_col)); + } + if (nX == MANY) + fatal("Too many rows"); + qsort((void *)X, (size_t)nX, sizeof(double), + (const void *)fcompar); + shift = X[atoi(a)*nX/100]; + rewind(fp); + } else if (a[0] == '=') + shift = atof(a+1); + +//fprintf(stderr, "shift = %4.3f\n", shift); + nshuffle = atoi(argv[6]); + if (nshuffle == 0) + cutoff = 0; + else { + for (nX = 0; nX < MANY && fgets(buf, BUF_SIZE, fp); ) { + if (buf[0] == '#') + continue; + X[nX++] = atof(get_col(buf, score_col)) - shift; + } + if (nX == MANY) + fatal("Too many rows"); + for (cutoff = 0.0, i = 0; i < nshuffle; ++i) { + shuffle_scores(); + cutoff = MAX(cutoff, best()); + } + rewind(fp); + } +//fprintf(stderr, "cutoff = %4.3f\n", cutoff); + + // loop over chromosomes; + // start by getting the chromosome's scores + while (get_chr(fp, chr_col, pos_col, score_col, chr)) { + // subtract shift from the scores + for (i = 0; i < nS; ++i) + X[i] = S[i].x - shift; + + // find the maximum=scoring regions + Huang(X, nS); + + // save any regions with >= 4 points and score >= cutoff + for (i = 0; i <= top; ++i) { + if (nW >= MAX_WINDOW) + fatalf("too many windows"); + + // get indices of the first and last SNP in the interval + b = R[i].Lpos + 1; + e = R[i].Rpos; + + // remove unmapped SNP position from intervals' ends + while (b < e && S[b].pos == -1) + ++b; + while (e > b && S[e].pos == -1) + --e; + + // record intervals + if (e - b < 3 || R[i].Score < cutoff) + continue; + W[nW].score = R[i].Score; + W[nW].chr = copy_string(chr); + W[nW].b = S[b].pos; + W[nW].e = S[e].pos+1; // Ws are half-open + if (snps) + W[nW].snps = add_snps(b, e); + ++nW; + } + } + + // sort by decreasing score + qsort((void *)W, (size_t)nW, sizeof(W[0]), (const void *)Wcompar); + + for (i = 0; i < nW; ++i) { + printf("%s\t%d\t%d\t%4.4f\n", + W[i].chr, W[i].b, W[i].e, W[i].score); + for (s = W[i].snps; s; s = s->next) + printf(" %d %3.2f\n", s->pos, s->x); + } + return 0; +}
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/lib/galaxy/datatypes/wsf.py Fri Sep 28 11:35:56 2012 -0400 @@ -0,0 +1,185 @@ +""" +SnpFile datatype +""" + +import galaxy.datatypes.data +import tempfile +import os +import simplejson +from galaxy import util +from galaxy.datatypes.sniff import * +from galaxy.datatypes.tabular import Tabular +from galaxy.datatypes.images import Html +from galaxy.datatypes import metadata +from galaxy.datatypes.metadata import MetadataElement + +class Wped( Html ): + allow_datatype_change = False + composite_type = 'basic' + file_ext = 'gd_ped' + + MetadataElement( name="base_name", desc="base name for all transformed versions of this genetic dataset", default='WpedData', readonly=True, set_in_upload=True ) + + def __init__( self, **kwd ): + Html.__init__( self, **kwd ) + self.add_composite_file( '%s.ped', description = 'Pedigree File', substitute_name_with_metadata = 'base_name', is_binary = False ) + self.add_composite_file( '%s.map', description = 'Map File', substitute_name_with_metadata = 'base_name', is_binary = False ) + +class Individuals( Tabular ): + file_ext = 'gd_indivs' + def __init__(self, **kwd): + Tabular.__init__( self, **kwd ) + self.column_names = [ 'Column', 'Name', 'Alias' ] + + def display_peek( self, dataset ): + return Tabular.make_html_table( self, dataset, column_names=self.column_names ) + +class DatasetComments( object ): + def __init__( self, dataset, comment_string='#' ): + self.dataset = dataset + self.comment_string = comment_string + self.comment_string_len = len(comment_string) + self._comments = [] + self._read_comments() + + def _read_comments( self ): + if self.dataset.has_data(): + try: + for line in open(self.dataset.file_name, 'rU'): + if line.startswith(self.comment_string): + comment = line[self.comment_string_len:] + self._comments.append(comment) + else: + break + except: + pass + + def __str__( self ): + return "".join(self._comments) + + @property + def comments( self ): + return self._comments + +class DatasetCommentMetadata( object ): + def __init__( self, dataset, comment_string='#' ): + self.dataset_comments = DatasetComments( dataset, comment_string ) + self._comment_metadata = {} + self._decode_dataset_comments() + + def _decode_dataset_comments( self ): + dataset_comment_string = str( self.dataset_comments ) + try: + self._comment_metadata = simplejson.loads( dataset_comment_string ) + except simplejson.JSONDecodeError as e: + pass + + @property + def comment_metadata( self ): + return self._comment_metadata + +class AnnotatedTabular( Tabular ): + """ Tabular file with optional comment block containing JSON to be imported into metadata """ + MetadataElement( name="comment_metadata", desc="comment metadata", param=metadata.DictParameter, visible=False, readonly=True ) + + def set_meta( self, dataset, overwrite = True, **kwd ): + Tabular.set_meta( self, dataset, overwrite=overwrite, max_data_lines=None, max_guess_type_data_lines=1000, **kwd ) + if dataset.metadata.comment_metadata is None: + dataset_comment_metadata = DatasetCommentMetadata( dataset ) + dataset.metadata.comment_metadata = dataset_comment_metadata.comment_metadata.copy() + self.set_dataset_metadata_from_comments( dataset ) + + def set_dataset_metadata_from_comments( self, dataset ): + pass + + def set_peek( self, dataset, line_count=None, is_multi_byte=False ): + super(Tabular, self).set_peek( dataset, line_count=line_count, is_multi_byte=is_multi_byte, WIDTH='unlimited', skipchars=['#'] ) + + def display_peek( self, dataset ): + """Returns formated html of peek""" + return Tabular.make_html_table( self, dataset, skipchars=['#'] ) + +class Fake( AnnotatedTabular ): + MetadataElement( name="scaffold", desc="scaffold column", param=metadata.ColumnParameter, default=0 ) + MetadataElement( name="pos", desc="pos column", param=metadata.ColumnParameter, default=0 ) + MetadataElement( name="ref", desc="ref column", param=metadata.ColumnParameter, default=0 ) + MetadataElement( name="rPos", desc="rPos column", param=metadata.ColumnParameter, default=0 ) + MetadataElement( name="species", desc="species", default='', no_value='', visible=False, readonly=True ) + + def set_dataset_metadata_from_comments( self, dataset ): + self.set_dataset_column_names_metadata( dataset ) + self.set_dataset_columnParameter_metadata( dataset ) + self.set_dataset_species_metadata( dataset ) + self.set_dataset_dbkey_metadata( dataset ) + + def set_dataset_column_names_metadata( self, dataset ): + value_from_comment_metadata = dataset.metadata.comment_metadata.get( 'column_names', None ) + if isinstance( value_from_comment_metadata, list ): + dataset.metadata.column_names = value_from_comment_metadata[:] + + def set_dataset_columnParameter_metadata( self, dataset ): + for name, spec in dataset.metadata.spec.items(): + if isinstance( spec.param, metadata.ColumnParameter ): + value_from_comment_metadata = dataset.metadata.comment_metadata.get( name, None ) + if value_from_comment_metadata is not None: + try: + i = int( value_from_comment_metadata ) + except: + i = 0 + if 0 <= i <= dataset.metadata.columns: + setattr( dataset.metadata, name, i ) + + def set_dataset_species_metadata( self, dataset ): + value_from_comment_metadata = dataset.metadata.comment_metadata.get( 'species', None ) + if isinstance( value_from_comment_metadata, basestring ): + dataset.metadata.species = value_from_comment_metadata + + def set_dataset_dbkey_metadata( self, dataset ): + value_from_comment_metadata = dataset.metadata.comment_metadata.get( 'dbkey', '?' ) + if isinstance( value_from_comment_metadata, basestring ): + dataset.metadata.dbkey = value_from_comment_metadata + +class GDSnp( Fake ): + """ Webb's SNP file format """ + file_ext = 'gd_snp' + + MetadataElement( name="individual_names", desc="individual names", visible=False, readonly=True ) + MetadataElement( name="individual_columns", desc="individual columns", visible=False, readonly=True ) + + def set_dataset_metadata_from_comments( self, dataset ): + Fake.set_dataset_metadata_from_comments( self, dataset ) + self.set_dataset_individual_metadata( dataset ) + + def set_dataset_individual_metadata( self, dataset ): + individual_list = dataset.metadata.comment_metadata.get( 'individuals', None ) + if not isinstance( individual_list, list ): + individual_list = [] + + individual_names = [] + individual_columns = [] + + for individual in individual_list: + if not isinstance( individual, list ) or len( individual ) != 2: + continue + name, col = individual + if not isinstance( name, basestring ): + name = '' + try: + c = int( col ) + except: + c = 0 + if 0 < c <= dataset.metadata.columns: + individual_names.append( name ) + individual_columns.append( c ) + + if individual_names: + dataset.metadata.individual_names = individual_names[:] + dataset.metadata.individual_columns = individual_columns[:] + +class GDSap( Fake ): + """ Webb's SAP file format """ + file_ext = 'gd_sap' + + MetadataElement( name="kegg_gene", desc="KEGG gene code column", param=metadata.ColumnParameter, default=0 ) + MetadataElement( name="kegg_path", desc="KEGG pathway code/name column", param=metadata.ColumnParameter, default=0 ) +
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/map_ensembl_transcripts.xml Fri Sep 28 11:35:56 2012 -0400 @@ -0,0 +1,42 @@ +<tool id="gd_new_oscar" name="Get Pathways" version="1.0.0"> + <description>: Look up KEGG pathways for given Ensembl transcripts</description> + + <command interpreter="python"> + rtrnKEGGpthwfENSEMBLTc.py + "--loc_file=${GALAXY_DATA_INDEX_DIR}/gd.oscar.loc" + "--species=${input.metadata.dbkey}" + "--input=${input}" + "--posENSEMBLclmn=${ensembl_col}" + "--output=${output}" + </command> + + <inputs> + <param name="input" type="data" format="tabular" label="Table" /> + <param name="ensembl_col" type="data_column" data_ref="input" label="Column with ENSEMBL transcript code" /> + </inputs> + + <outputs> + <data name="output" format="tabular" /> + </outputs> + + <!-- + <tests> + <test> + <param name="input" value="test_in/ensembl.tabular" ftype="tabular"> + <metadata name="dbkey" value="canFam2" /> + </param> + <param name="ensembl_col" value="1" /> + + <output name="output" file="test_out/map_ensembl_transcripts/map_ensembl_transcripts.tabular" /> + </test> + </tests> + --> + + <help> + +**What it does** + +Adds the fields KEGG gene codes and KEGG pathways to an input table of ENSEMBL transcript codes. + + </help> +</tool>
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/mkpthwpng.py Fri Sep 28 11:35:56 2012 -0400 @@ -0,0 +1,92 @@ +#!/usr/bin/env python +# -*- coding: utf-8 -*- +# +# mkpthwpng.py +# +# Copyright 2011 Oscar Bedoya-Reina <oscar@niska.bx.psu.edu> +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program; if not, write to the Free Software +# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, +# MA 02110-1301, USA. + +import argparse,mechanize,os,sys + +#this return an image made up from a list of genes and pathway code +def rtnHTMLformat(tmpddGenrcgenPresent,sppPrefx,pthwcod,ouPthwpng): + inpx='\n'.join(tmpddGenrcgenPresent)#inpx="ALDH2 color \nALDH3A1 color" + request=mechanize.Request("http://www.genome.jp/kegg/tool/map_pathway2.html") + response = mechanize.urlopen(request) + forms = mechanize.ParseResponse(response, backwards_compat=False) + form=forms[0] + form["unclassified"]=inpx + form["org_name"]=[sppPrefx] + request2 = form.click() + response2 = mechanize.urlopen(request2) + a=str(response2.read()).split('href="/kegg-bin/show_pathway?')[1] + code=a.split('/')[0]#response2.read() + request=mechanize.Request("http://www.genome.jp/kegg-bin/show_pathway?%s/%s.args"%(code,pthwcod))#request=mechanize.Request("http://www.genome.jp/kegg-bin/show_pathway?%s/%s.args"%('13171478854246','hsa00410')) + response = mechanize.urlopen(request) + forms = mechanize.ParseResponse(response, backwards_compat=False) + form=forms[1] + status=' NOT ' + try: + imgf=str(forms[1]).split('/mark_pathway')[1].split('/')[0] + os.system("wget --quiet http://www.genome.jp/tmp/mark_pathway%s/%s.png -O %s"%(imgf,pthwcod,ouPthwpng)) + status=' ' + except: + pass + return 'A pathway image was%ssuccefully produced...'%status + + +def main(): + parser = argparse.ArgumentParser(description='Obtain KEGG images from a list of genes.') + parser.add_argument('--input',metavar='input TXT file',type=str,help='the input file with the table in txt format') + parser.add_argument('--output',metavar='output PNG image',type=str,help='the output image file in png format') + parser.add_argument('--KEGGpath',metavar='KEGG pathway code (i.e. cfa00230)',type=str,help='the code of the pathway of interest') + parser.add_argument('--posKEGGclmn',metavar='column number',type=int,help='the column with the KEGG pathway code/name') + parser.add_argument('--KEGGgeneposcolmn',metavar='column number',type=int,help='column with the KEGG gene code') + #~Open arguments + class C(object): + pass + fulargs=C() + parser.parse_args(sys.argv[1:],namespace=fulargs) + #test input vars + inputf,outputf,KEGGpathw,posKEGGclmn,Kgeneposcolmn=fulargs.input,fulargs.output,fulargs.KEGGpath,fulargs.posKEGGclmn,fulargs.KEGGgeneposcolmn + # make posKEGGclmn, Kgeneposcolmn 0-based + sppPrefx= KEGGpathw[:3] + posKEGGclmn -= 1 + Kgeneposcolmn -= 1 + #make a dictionary of valid genes + dKEGGcPthws=dict([(x.split('\t')[Kgeneposcolmn],set([y.split('=')[0] for y in x.split('\t')[posKEGGclmn].split('.')])) for x in open(inputf).read().splitlines()[1:] if x.strip()]) + for mt1gene in [x for x in dKEGGcPthws.keys() if x.find('.')>-1]:#to crrect names with more than one gene + pthwsAssotd=dKEGGcPthws.pop(mt1gene) + for eachg in mt1gene.split('.'): + dKEGGcPthws[eachg]=pthwsAssotd + tmpddGenrcgenPresent=set() + sKEGGc=dKEGGcPthws.keys() + lsKEGGc=len(sKEGGc) + ctPthw=0 + while ctPthw < lsKEGGc:#to save memory + eachK=sKEGGc.pop() + alPthws=dKEGGcPthws[eachK] + if KEGGpathw in alPthws: + tmpddGenrcgenPresent.add('\t'.join([eachK,'red'])) + ctPthw+=1 + #run the program + rtnHTMLformat(tmpddGenrcgenPresent,sppPrefx,KEGGpathw,outputf) + return 0 + + +if __name__ == '__main__': + main()
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/modify_snp_table.py Fri Sep 28 11:35:56 2012 -0400 @@ -0,0 +1,51 @@ +#!/usr/bin/env python + +import sys +import subprocess +from Population import Population + +################################################################################ + +if len(sys.argv) < 9: + print >> sys.stderr, "Usage" + sys.exit(1) + +input, p1_input, output, lo, hi, lo_ind, lo_ind_qual = sys.argv[1:8] +individual_metadata = sys.argv[8:] + +p_total = Population() +p_total.from_tag_list(individual_metadata) + +p1 = Population() +p1.from_population_file(p1_input) + +if not p_total.is_superset(p1): + print >> sys.stderr, 'There is an individual in the population that is not in the SNP table' + sys.exit(1) + +################################################################################ + +prog = 'pop' + +args = [] +args.append(prog) +args.append(input) +args.append(lo) +args.append(hi) +args.append(lo_ind) +args.append(lo_ind_qual) + +columns = p1.column_list() + +for column in sorted(columns): + args.append(column) + +fh = open(output, 'w') + +#print "args:", ' '.join(args) +p = subprocess.Popen(args, bufsize=-1, stdin=None, stdout=fh, stderr=sys.stderr) +rc = p.wait() +fh.close() + +sys.exit(0) +
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/pathway_image.xml Fri Sep 28 11:35:56 2012 -0400 @@ -0,0 +1,48 @@ +<tool id="gd_pathway_image" name="Pathway Image" version="1.0.0"> + <description>: Draw a KEGG pathway, highlighting specified gene modules</description> + + <command interpreter="python"> + mkpthwpng.py + "--input=${input}" + "--output=${output}" + "--KEGGpath=${pathway}" + "--posKEGGclmn=${input.metadata.kegg_path}" + "--KEGGgeneposcolmn=${input.metadata.kegg_gene}" + </command> + + <inputs> + <param name="input" type="data" format="gd_sap" label="Table"> + <validator type="metadata" check="kegg_gene,kegg_path" message="Missing KEGG gene code column and/or KEGG pathway code/name column metadata. Click the pencil icon in the history item to edit/save the metadata attributes" /> + </param> + <param name="pathway" type="select"> + <options from_file="gd.pathways.txt"> + <column name="value" index="1"/> + <column name="name" index="2"/> + <filter type="data_meta" ref="input" key="dbkey" column="0" separator="\t" /> + </options> + </param> + </inputs> + + <outputs> + <data name="output" format="png" /> + </outputs> + + <tests> + <test> + <param name="input" value="test_in/sample.gd_sap" ftype="gd_sap" /> + <param name="pathway" value="cfa05214" /> + <output name="output" file="test_out/pathway_image/pathway_image.png" compare="sim_size" delta = "10000" /> + </test> + </tests> + + <help> + +**What it does** + +This tool produces an image of an input KEGG pathway, highlighting the +modules representing genes in an input list. NOTE: a given gene can +be assigned to multiple modules, and different genes can be assigned to +the same module. + + </help> +</tool>
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/pca.py Fri Sep 28 11:35:56 2012 -0400 @@ -0,0 +1,258 @@ +#!/usr/bin/env python + +import errno +import os +import shutil +import subprocess +import sys +from BeautifulSoup import BeautifulSoup +import gd_composite + +################################################################################ + +def mkdir_p(path): + try: + os.makedirs(path) + except OSError, e: + if e.errno <> errno.EEXIST: + raise + +################################################################################ + +def run_program(prog, args, stdout_file=None): + #print "args: ", ' '.join(args) + p = subprocess.Popen(args, bufsize=-1, executable=prog, stdin=None, stdout=subprocess.PIPE, stderr=subprocess.PIPE) + (stdoutdata, stderrdata) = p.communicate() + rc = p.returncode + + if stdout_file is not None: + with open(stdout_file, 'w') as ofh: + print >> ofh, stdoutdata + + if rc != 0: + print >> sys.stderr, "FAILED: rc={0}: {1}".format(rc, ' '.join(args)) + print >> sys.stderr, stderrdata + sys.exit(1) + +################################################################################ + +def do_ped2geno(input, output): + lines = [] + with open(input) as fh: + for line in fh: + line = line.rstrip('\r\n') + lines.append(line.split()) + + pair_map = { + '0':{ '0':'9', '1':'9', '2':'9' }, + '1':{ '0':'1', '1':'2', '2':'1' }, + '2':{ '0':'1', '1':'1', '2':'0' } + } + with open(output, 'w') as ofh: + for a_idx in xrange(6, len(lines[0]), 2): + b_idx = a_idx + 1 + print >> ofh, ''.join(map(lambda line: pair_map[line[a_idx]][line[b_idx]], lines)) + +def do_map2snp(input, output): + with open(output, 'w') as ofh: + with open(input) as fh: + for line in fh: + elems = line.split() + print >> ofh, ' {0} 11 0.002 2000 A T'.format(elems[1]) + +def make_ind_file(ind_file, input): + pops = [] + + ofh = open(ind_file, 'w') + + with open(input) as fh: + soup = BeautifulSoup(fh) + misc = soup.find('div', {'id': 'gd_misc'}) + populations = misc('ul')[0] + + i = 0 + for entry in populations: + if i % 2 == 1: + population_name = entry.contents[0].encode('utf8').strip().replace(' ', '_') + pops.append(population_name) + individuals = entry.ol('li') + for individual in individuals: + individual_name = individual.string.encode('utf8').strip() + print >> ofh, individual_name, 'M', population_name + i += 1 + + ofh.close() + return pops + +def make_par_file(par_file, geno_file, snp_file, ind_file, evec_file, eval_file): + with open(par_file, 'w') as fh: + print >> fh, 'genotypename: {0}'.format(geno_file) + print >> fh, 'snpname: {0}'.format(snp_file) + print >> fh, 'indivname: {0}'.format(ind_file) + print >> fh, 'evecoutname: {0}'.format(evec_file) + print >> fh, 'evaloutname: {0}'.format(eval_file) + print >> fh, 'altnormstyle: NO' + print >> fh, 'numoutevec: 2' + +def do_smartpca(par_file): + prog = 'smartpca' + + args = [ prog ] + args.append('-p') + args.append(par_file) + + #print "args: ", ' '.join(args) + p = subprocess.Popen(args, bufsize=-1, stdin=None, stdout=subprocess.PIPE, stderr=sys.stderr) + (stdoutdata, stderrdata) = p.communicate() + rc = p.returncode + + if rc != 0: + print >> sys.stderr, "FAILED: rc={0}: {1}".format(rc, ' '.join(args)) + print >> sys.stderr, stderrdata + sys.exit(1) + + stats = [] + + save_line = False + for line in stdoutdata.split('\n'): + if line.startswith(('## Average divergence', '## Anova statistics', '## Statistical significance')): + stats.append('') + save_line = True + if line.strip() == '': + save_line = False + if save_line: + stats.append(line) + + return '\n'.join(stats[1:]) + +def do_ploteig(evec_file, population_names): + prog = 'gd_ploteig' + + args = [ prog ] + args.append('-i') + args.append(evec_file) + args.append('-c') + args.append('1:2') + args.append('-p') + args.append(':'.join(population_names)) + args.append('-x') + + run_program(None, args) + +def do_eval2pct(eval_file, explained_file): + prog = 'eval2pct' + + args = [ prog ] + args.append(eval_file) + + with open(explained_file, 'w') as ofh: + #print "args:", ' '.join(args) + p = subprocess.Popen(args, bufsize=-1, stdin=None, stdout=ofh, stderr=subprocess.PIPE) + (stdoutdata, stderrdata) = p.communicate() + rc = p.returncode + + if rc != 0: + print >> sys.stderr, "FAILED: rc={0}: {1}".format(rc, ' '.join(args)) + print >> sys.stderr, stderrdata + sys.exit(1) + +def do_coords2admix(coords_file): + prog = 'coords2admix' + + args = [ prog ] + args.append(coords_file) + + with open('fake', 'w') as ofh: + #print "args:", ' '.join(args) + p = subprocess.Popen(args, bufsize=-1, stdin=None, stdout=ofh, stderr=subprocess.PIPE) + (stdoutdata, stderrdata) = p.communicate() + rc = p.returncode + + if rc != 0: + print >> sys.stderr, "FAILED: rc={0}: {1}".format(rc, ' '.join(args)) + print >> sys.stderr, stderrdata + sys.exit(1) + + shutil.copy2('fake', coords_file) + +################################################################################ + +if len(sys.argv) != 5: + print "usage" + sys.exit(1) + +input, input_files_path, output, output_files_path = sys.argv[1:5] + +mkdir_p(output_files_path) + +ped_file = os.path.join(input_files_path, 'admix.ped') +geno_file = os.path.join(output_files_path, 'admix.geno') +do_ped2geno(ped_file, geno_file) + +map_file = os.path.join(input_files_path, 'admix.map') +snp_file = os.path.join(output_files_path, 'admix.snp') +do_map2snp(map_file, snp_file) + +ind_file = os.path.join(output_files_path, 'admix.ind') +population_names = make_ind_file(ind_file, input) + +par_file = os.path.join(output_files_path, 'par.admix') +evec_file = os.path.join(output_files_path, 'coordinates.txt') +eval_file = os.path.join(output_files_path, 'admix.eval') +make_par_file(par_file, geno_file, snp_file, ind_file, evec_file, eval_file) + +smartpca_stats = do_smartpca(par_file) + +do_ploteig(evec_file, population_names) +plot_file = 'coordinates.txt.1:2.{0}.pdf'.format(':'.join(population_names)) +output_plot_file = os.path.join(output_files_path, 'PCA.pdf') +shutil.copy2(plot_file, output_plot_file) +os.unlink(plot_file) + +do_eval2pct(eval_file, os.path.join(output_files_path, 'explained.txt')) +os.unlink(eval_file) + +do_coords2admix(evec_file) + +################################################################################ + +info_page = gd_composite.InfoPage() +info_page.set_title('PCA Galaxy Composite Dataset') + +display_file = gd_composite.DisplayFile() +display_value = gd_composite.DisplayValue() + +out_pdf = gd_composite.Parameter(name='PCA.pdf', value='PCA.pdf', display_type=display_file) +out_evec = gd_composite.Parameter(name='coordinates.txt', value='coordinates.txt', display_type=display_file) +out_explained = gd_composite.Parameter(name='explained.txt', value='explained.txt', display_type=display_file) + +evec_prefix = 'coordinates.txt.1:2.{0}'.format(':'.join(population_names)) +ps_file = '{0}.ps'.format(evec_prefix) +xtxt_file = '{0}.xtxt'.format(evec_prefix) + +os.unlink(os.path.join(output_files_path, ps_file)) +os.unlink(os.path.join(output_files_path, xtxt_file)) + +info_page.add_output_parameter(out_pdf) +info_page.add_output_parameter(out_evec) +info_page.add_output_parameter(out_explained) + +in_admix = gd_composite.Parameter(name='par.admix', value='par.admix', display_type=display_file) +in_geno = gd_composite.Parameter(name='admix.geno', value='admix.geno', display_type=display_file) +in_snp = gd_composite.Parameter(name='admix.snp', value='admix.snp', display_type=display_file) +in_ind = gd_composite.Parameter(name='admix.ind', value='admix.ind', display_type=display_file) + +info_page.add_input_parameter(in_admix) +info_page.add_input_parameter(in_geno) +info_page.add_input_parameter(in_snp) +info_page.add_input_parameter(in_ind) + +misc_stats = gd_composite.Parameter(description='Stats<p/><pre>\n{0}\n</pre>'.format(smartpca_stats), display_type=display_value) + +info_page.add_misc(misc_stats) + +with open (output, 'w') as ofh: + print >> ofh, info_page.render() + +sys.exit(0) +
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/pca.xml Fri Sep 28 11:35:56 2012 -0400 @@ -0,0 +1,116 @@ +<tool id="gd_pca" name="PCA" version="1.0.0"> + <description>: Principal Component Analysis of genotype data</description> + + <command interpreter="python"> + pca.py "$input" "$input.extra_files_path" "$output" "$output.files_path" + </command> + + <inputs> + <param name="input" type="data" format="gd_ped" label="Dataset" /> + </inputs> + + <outputs> + <data name="output" format="html" /> + </outputs> + + <!-- + <tests> + <test> + <param name="input" value="fake" ftype="gd_ped" > + <metadata name="base_name" value="admix" /> + <composite_data value="test_out/prepare_population_structure/prepare_population_structure.html" /> + <composite_data value="test_out/prepare_population_structure/admix.ped" /> + <composite_data value="test_out/prepare_population_structure/admix.map" /> + <edit_attributes type="name" value="fake" /> + </param> + + <output name="output" file="test_out/pca/pca.html" ftype="html" compare="diff" lines_diff="2"> + <extra_files type="file" name="admix.geno" value="test_out/pca/admix.geno" /> + <extra_files type="file" name="admix.gd_indivs" value="test_out/pca/admix.gd_indivs" /> + <extra_files type="file" name="admix.gd_snp" value="test_out/pca/admix.gd_snp" /> + <extra_files type="file" name="coordinates.txt" value="test_out/pca/coordinates.txt" /> + <extra_files type="file" name="explained.txt" value="test_out/pca/explained.txt" /> + <extra_files type="file" name="par.admix" value="test_out/pca/par.admix" compare="diff" lines_diff="10" /> + <extra_files type="file" name="PCA.pdf" value="test_out/pca/PCA.pdf" compare="sim_size" delta = "1000" /> + </output> + + </test> + </tests> + --> + + <help> + +**Dataset formats** + +The input dataset is in gd_ped_ format. +The output dataset is html_ with links to a pdf for a graphical output and +text files. (`Dataset missing?`_) + +.. _gd_ped: ./static/formatHelp.html#gd_ped +.. _html: ./static/formalHelp.html#html +.. _Dataset missing?: ./static/formatHelp.html + +----- + +**What it does** + +The user selects a gd_ped dataset generated by the Prepare Input tool. +The PCA tool runs a +Principal Component Analysis on the input genotype data and constructs +a plot of the top two principal components. It also reports the +following estimates of the statistical significance of the analysis. + +1. Average divergence between each pair of populations. Specifically, +from the covariance matrix X whose eigenvectors were computed, we can +compute a "distance", d, for each pair of individuals (i,j): d(i,j) = +X(i,i) + X(j,j) - 2X(i,j). For each pair of populations (a,b) now +define an average distance: D(a,b) = \sum d(i,j) (in pop a, in pop b) +/ (\|pop a\| * \|pop b\|). We then normalize D so that the diagonal +has mean 1 and report it. + +2. Anova statistics for population differences along each +eigenvector. For each eigenvector, a P-value for statistical +significance of differences between each pair of populations along +that eigenvector is printed. +++ is used to highlight P-values less +than 1e-06. \*\*\* is used to highlight P-values between 1e-06 and +1e-03. If there are more than 2 populations, then an overall P-value +is also printed for that eigenvector, as are the populations with +minimum (minv) and maximum (maxv) eigenvector coordinate. [If there is +only 1 population, no Anova statistics are printed.] + +3. Statistical significance of differences between populations. For +each pair of populations, the above Anova statistics are summed across +eigenvectors. The result is approximately chisq with d.o.f. equal to +the number of eigenvectors. The chisq statistic and its p-value are +printed. [If there is only 1 population, no statistics are printed.] + +We post-process the output of the PCA tool to estimate "admixture +fractions". For this, we take three populations at a time and +determine each one's average point in the PCA plot (by separately +averaging first and second coordinates). For each combination of two +center points, modeling two ancestral populations, we try to model the +third central point as having a certain fraction, r, of its SNP +genotypes from the second ancestral population and the remainder from +the first ancestral population, where we estimate r. The output file +"coordinates.txt" then contains pairs of lines like + +projection along chord Population1 -> Population2 + Population3: 0.12345 + +where the number (in this case 0.1245) is the estimation of r. +Computations with simulated data suggests that the true r is +systematically underestimated, perhaps giving roughly 0.6 times r. + +----- + +**Acknowledgments** + +We use the programs "smartpca" and "ploteig" downloaded from + +http://genepath.med.harvard.edu/~reich/Software.htm + +and described in the paper "Population structure and eigenanalysis" +by Nick Patterson, Alkes L. Price, and David Reich, PLoS Genetics, 2 (2006), e190. + + </help> +</tool>
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/phylogenetic_tree.py Fri Sep 28 11:35:56 2012 -0400 @@ -0,0 +1,219 @@ +#!/usr/bin/env python + +import os +import errno +import sys +import subprocess +import shutil +from Population import Population +import gd_composite + +################################################################################ + +def mkdir_p(path): + try: + os.makedirs(path) + except OSError, e: + if e.errno <> errno.EEXIST: + raise + +################################################################################ + +if len(sys.argv) < 11: + print >> sys.stderr, "Usage" + sys.exit(1) + +input, p1_input, output, extra_files_path, minimum_coverage, minimum_quality, dbkey, data_source, draw_tree_options = sys.argv[1:10] + +individual_metadata = sys.argv[10:] + +# note: TEST THIS +if dbkey in ['', '?', 'None']: + dbkey = 'none' + +p_total = Population() +p_total.from_tag_list(individual_metadata) + + +################################################################################ + +mkdir_p(extra_files_path) + +################################################################################ + +def run_program(prog, args, ofh): + #print "args: ", ' '.join(args) + p = subprocess.Popen(args, bufsize=-1, executable=prog, stdin=None, stdout=ofh, stderr=subprocess.PIPE) + (stdoutdata, stderrdata) = p.communicate() + rc = p.returncode + ofh.close() + + if rc != 0: + #print >> sys.stderr, "FAILED: rc={0}: {1}".format(rc, ' '.join(args)) + print >> sys.stderr, stderrdata + sys.exit(1) + +################################################################################ + +phylip_outfile = os.path.join(extra_files_path, 'distance_matrix.phylip') +newick_outfile = os.path.join(extra_files_path, 'phylogenetic_tree.newick') +ps_outfile = 'tree.ps' +pdf_outfile = os.path.join(extra_files_path, 'tree.pdf') + +################################################################################ + +informative_snp_file = os.path.join(extra_files_path, 'informative_snps.txt') +mega_distance_matrix_file = os.path.join(extra_files_path, 'mega_distance_matrix.txt') + +prog = 'dist_mat' + +args = [] +args.append(prog) +args.append(input) +args.append(minimum_coverage) +args.append(minimum_quality) +args.append(dbkey) +args.append(data_source) +args.append(informative_snp_file) +args.append(mega_distance_matrix_file) + +if p1_input == "all_individuals": + tags = p_total.tag_list() +else: + p1 = Population() + p1.from_population_file(p1_input) + if not p_total.is_superset(p1): + print >> sys.stderr, 'There is an individual in the population that is not in the SNP table' + sys.exit(1) + tags = p1.tag_list() + +for tag in tags: + args.append(tag) + +fh = open(phylip_outfile, 'w') +run_program(None, args, fh) + +################################################################################ + +prog = 'quicktree' + +args = [] +args.append(prog) +args.append('-in') +args.append('m') +args.append('-out') +args.append('t') +args.append(phylip_outfile) + +fh = open(newick_outfile, 'w') +run_program(None, args, fh) + +################################################################################ + +prog = 'draw_tree' + +args = [] +args.append(prog) +if draw_tree_options: + args.append(draw_tree_options) +args.append(newick_outfile) + +fh = open(ps_outfile, 'w') +run_program(None, args, fh) + +################################################################################ + +prog = 'ps2pdf' + +args = [] +args.append(prog) +args.append('-dPDFSETTINGS=/prepress') +args.append(ps_outfile) +args.append('-') + +fh = open(pdf_outfile, 'w') +run_program(None, args, fh) + +shutil.copyfile(pdf_outfile, output) + +################################################################################ + +info_page = gd_composite.InfoPage() +info_page.set_title('Phylogenetic tree Galaxy Composite Dataset') + +display_file = gd_composite.DisplayFile() +display_value = gd_composite.DisplayValue() + +out_pdf = gd_composite.Parameter(name='tree.pdf', value='tree.pdf', display_type=display_file) +out_newick = gd_composite.Parameter(value='phylogenetic_tree.newick', name='phylogenetic tree (newick)', display_type=display_file) +out_phylip = gd_composite.Parameter(value='distance_matrix.phylip', name='Phylip distance matrix', display_type=display_file) +out_mega = gd_composite.Parameter(value='mega_distance_matrix.txt', name='Mega distance matrix', display_type=display_file) +out_snps = gd_composite.Parameter(value='informative_snps.txt', name='informative SNPs', display_type=display_file) + +info_page.add_output_parameter(out_pdf) +info_page.add_output_parameter(out_newick) +info_page.add_output_parameter(out_phylip) +info_page.add_output_parameter(out_mega) +info_page.add_output_parameter(out_snps) + +in_min_cov = gd_composite.Parameter(description='Minimum coverage', value=minimum_coverage, display_type=display_value) +in_min_qual = gd_composite.Parameter(description='Minimum quality', value=minimum_quality, display_type=display_value) + +include_ref_value = 'no' +if dbkey != 'none': + include_ref_value = 'yes' + +in_include_ref = gd_composite.Parameter(description='Include reference sequence', value=include_ref_value, display_type=display_value) + +if data_source == '0': + data_source_value = 'sequence coverage' +elif data_source == '1': + data_source_value = 'estimated genotype' + +in_data_source = gd_composite.Parameter(description='Data source', value=data_source_value, display_type=display_value) + +branch_type_value = 'square' +if 'd' in draw_tree_options: + branch_type_value = 'diagonal' + +in_branch_type = gd_composite.Parameter(description='Branch type', value=branch_type_value, display_type=display_value) + +branch_scale_value = 'yes' +if 's' in draw_tree_options: + branch_scale_value = 'no' + +in_branch_scale = gd_composite.Parameter(description='Draw branches to scale', value=branch_scale_value, display_type=display_value) + +branch_length_value = 'yes' +if 'b' in draw_tree_options: + branch_length_value = 'no' + +in_branch_length = gd_composite.Parameter(description='Show branch lengths', value=branch_length_value, display_type=display_value) + +tree_layout_value = 'horizontal' +if 'v' in draw_tree_options: + tree_layout_value = 'vertical' + +in_tree_layout = gd_composite.Parameter(description='Tree layout', value=tree_layout_value, display_type=display_value) + +info_page.add_input_parameter(in_min_cov) +info_page.add_input_parameter(in_min_qual) +info_page.add_input_parameter(in_include_ref) +info_page.add_input_parameter(in_data_source) +info_page.add_input_parameter(in_branch_type) +info_page.add_input_parameter(in_branch_scale) +info_page.add_input_parameter(in_branch_length) +info_page.add_input_parameter(in_tree_layout) + +misc_individuals = gd_composite.Parameter(name='Individuals', value=tags, display_type=gd_composite.DisplayTagList()) + +info_page.add_misc(misc_individuals) + + +with open(output, 'w') as ofh: + print >> ofh, info_page.render() + +################################################################################ + +sys.exit(0) +
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/phylogenetic_tree.xml Fri Sep 28 11:35:56 2012 -0400 @@ -0,0 +1,192 @@ +<tool id="gd_phylogenetic_tree" name="Phylogenetic Tree" version="1.0.0"> + <description>: Show genetic relationships among individuals</description> + + <command interpreter="python"> + phylogenetic_tree.py "$input" + #if $individuals.choice == '0' + "all_individuals" + #else if $individuals.choice == '1' + "$p1_input" + #end if + "$output" "$output.files_path" "$minimum_coverage" "$minimum_quality" + #if ((str($input.metadata.scaffold) == str($input.metadata.ref)) and (str($input.metadata.pos) == str($input.metadata.rPos))) or (str($include_reference) == '0') + "none" + #else + "$input.metadata.dbkey" + #end if + "$data_source" + #set $draw_tree_options = ''.join(str(x) for x in [$branch_style, $scale_style, $length_style, $layout_style]) + #if $draw_tree_options == '' + "" + #else + "-$draw_tree_options" + #end if + #for $individual_name, $individual_col in zip($input.dataset.metadata.individual_names, $input.dataset.metadata.individual_columns) + #set $arg = '%s:%s' % ($individual_col, $individual_name) + "$arg" + #end for + </command> + + <inputs> + <param name="input" type="data" format="gd_snp" label="SNP dataset" /> + + <conditional name="individuals"> + <param name="choice" type="select" label="Individuals"> + <option value="0" selected="true">All individuals</option> + <option value="1">Individuals in a population</option> + </param> + <when value="0" /> + <when value="1"> + <param name="p1_input" type="data" format="gd_indivs" label="Population individuals" /> + </when> + </conditional> + + <param name="minimum_coverage" type="integer" min="0" value="0" label="Minimum coverage" /> + + <param name="minimum_quality" type="integer" min="0" value="0" label="Minimum quality" help="Note: minimum coverage and minimum quality cannot both be 0" /> + + <param name="include_reference" type="select" format="integer" label="Include reference sequence"> + <option value="1" selected="true">Yes</option> + <option value="0">No</option> + </param> + + <param name="data_source" type="select" format="integer" label="Data source"> + <option value="0" selected="true">sequence coverage</option> + <option value="1">estimated genotype</option> + </param> + + <param name="branch_style" type="select" display="radio"> + <label>Branch type</label> + <option value="" selected="true">square</option> + <option value="d">diagonal</option> + </param> + + <param name="scale_style" type="select" display="radio"> + <label>Draw branches to scale</label> + <option value="" selected="true">yes</option> + <option value="s">no</option> + </param> + + <param name="length_style" type="select" display="radio"> + <label>Show branch lengths</label> + <option value="" selected="true">yes</option> + <option value="b">no</option> + </param> + + <param name="layout_style" type="select" display="radio"> + <label>Tree layout</label> + <option value="" selected="true">horizontal</option> + <option value="v">vertical</option> + </param> + </inputs> + + <outputs> + <data name="output" format="html" /> + </outputs> + + <tests> + <test> + <param name="input" value="test_in/sample.gd_snp" ftype="gd_snp" /> + <param name="choice" value="0" /> + <param name="minimum_coverage" value="3" /> + <param name="minimum_quality" value="30" /> + <param name="data_source" value="0" /> + <param name="branch_style" value="" /> + <param name="scale_style" value="" /> + <param name="length_style" value="" /> + <param name="layout_style" value="" /> + <output name="output" file="test_out/phylogenetic_tree/phylogenetic_tree.html" ftype="html" compare="diff" lines_diff="2"> + <extra_files type="file" name="distance_matrix.phylip" value="test_out/phylogenetic_tree/distance_matrix.phylip" /> + <extra_files type="file" name="informative_snps.txt" value="test_out/phylogenetic_tree/informative_snps.txt" /> + <extra_files type="file" name="mega_distance_matrix.txt" value="test_out/phylogenetic_tree/mega_distance_matrix.txt" /> + <extra_files type="file" name="phylogenetic_tree.newick" value="test_out/phylogenetic_tree/phylogenetic_tree.newick" /> + <extra_files type="file" name="tree.pdf" value="test_out/phylogenetic_tree/tree.pdf" compare="sim_size" delta = "1000"/> + </output> + </test> + </tests> + + <help> + +**Dataset formats** + +The input dataset is in gd_snp_ format. +The output is a composite dataset, containing the tree in both text (Newick_) +and PostScript formats, as well as supplemental text information. +(`Dataset missing?`_) + +.. _gd_snp: ./static/formatHelp.html#gd_snp +.. _Newick: http://evolution.genetics.washington.edu/phylip/newicktree.html +.. _Dataset missing?: ./static/formatHelp.html + +----- + +**What it does** + +This tool uses a gd_snp dataset to determine a kind of "genetic distance" +between each pair of individuals. That information is used to +produce a tree-shaped figure that depicts how the individuals are related, +both as a text files and as a diagram. +The text files include a common tree format, Newick, as well as distance +matrices and counts of informative SNPs for each pairwise comparison. +The informative SNPs can be used as a guide to how reliable the tree is. + +The input parameters are: + +SNP dataset + A table of SNPs for various individuals, in gd_snp format. + +Individuals + By default all individuals are included in the analysis, but this can + optionally be restricted to a subset that has been defined using the + Specify Individuals tool. + +Minimum coverage + For each pair of individuals, the tool looks for informative SNPs, i.e., + where the sequence data for both individuals is adequate according to + some criterion. Specifying, say, 7 for this option instructs the tool + to consider only SNPs with coverage at least 7 in both individuals + when estimating their "genetic distance". + +Minimum quality + Specifying, say, 37 for this option instructs the tool to consider + only SNPs with SAMtools quality value at least 37 in both individuals + when estimating their "genetic distance". + +Include reference sequence + For gd_snp datasets containing columns for a reference sequence, the + user can ask that the reference be indicated in the tree, to help with + rooting it. If the dataset has no reference columns, this option has + no effect. + +Data source + The genetic distance between two individuals at a given SNP can + be estimated two ways. One method is to use the absolute value of the + difference in the frequency of the first allele (or equivalently, the + second allele). For instance, if the first individual has 5 reads of + each allele and the second individual has respectively 3 and 6 reads, + then the frequencies are 1/2 and 1/3, giving a distance 1/6 at that + SNP. The other approach is to use the SAMtools genotypes to estimate + the difference in the number of occurrences of the first allele. + For instance, if the two genotypes are 2 and 1, i.e., the individuals + are estimated to have respectively 2 and 1 occurrences of the first + allele at this location, then the distance is 1 (the absolute value + of the difference of the two numbers). + +Output options + The final four options apply mostly to the graphical drawing of the + tree, except that the branch lengths are also added to the Newick text + file. + +----- + +**Acknowledgments** + +To convert the distance matrix to a Newick-formatted tree, we use the +QuickTree program from +http://www.sanger.ac.uk/resources/software/quicktree/ . + +To make the diagram we use draw_tree, available at +http://compgen.bscb.cornell.edu/phast/ . + + </help> +</tool>
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/population_structure.py Fri Sep 28 11:35:56 2012 -0400 @@ -0,0 +1,121 @@ +#!/usr/bin/env python + +import errno +import os +import shutil +import subprocess +import sys +from BeautifulSoup import BeautifulSoup +import gd_composite + +################################################################################ + +def run_admixture(ped_file, populations): + prog = 'admixture' + + args = [] + args.append(prog) + args.append(input_ped_file) + args.append(populations) + + #print "args:", ' '.join(args) + ofh = open('/dev/null', 'w') + p = subprocess.Popen(args, bufsize=-1, stdin=None, stdout=ofh, stderr=sys.stderr) + rc = p.wait() + ofh.close() + +def run_r(input_file, output_file, populations): + prog = 'R' + + args = [] + args.append(prog) + args.append('--vanilla') + args.append('--quiet') + args.append('--args') + args.append(input_file) + args.append(output_file) + args.append(populations) + + _realpath = os.path.realpath(__file__) + _script_dir = os.path.dirname(_realpath) + r_script_file = os.path.join(_script_dir, 'population_structure.r') + + ifh = open(r_script_file) + ofh = open('/dev/null', 'w') + p = subprocess.Popen(args, bufsize=-1, stdin=ifh, stdout=ofh, stderr=None) + rc = p.wait() + ifh.close() + ofh.close() + +def mkdir_p(path): + try: + os.makedirs(path) + except OSError, e: + if e.errno <> errno.EEXIST: + raise + +def get_populations(input): + pops = [] + pop_names = {} + + with open(input) as fh: + soup = BeautifulSoup(fh) + misc = soup.find('div', {'id': 'gd_misc'}) + + return 'Populations\n{0}'.format(misc('ul')[0]) + +################################################################################ + +if len(sys.argv) != 6: + print >> sys.stderr, "Usage" + sys.exit(1) + +input_html_file, input_ped_file, output_file, extra_files_path, populations = sys.argv[1:6] +populations_html = get_populations(input_html_file) + +run_admixture(input_ped_file, populations) + +ped_base = os.path.basename(input_ped_file) +if ped_base.endswith('.ped'): + ped_base = ped_base[:-4] + +p_file = '%s.%s.P' % (ped_base, populations) +q_file = '%s.%s.Q' % (ped_base, populations) + +mkdir_p(extra_files_path) +numeric_output_file = os.path.join(extra_files_path, 'numeric.txt') +shutil.copy2(q_file, numeric_output_file) +os.remove(p_file) +os.remove(q_file) + +graphical_output_file = os.path.join(extra_files_path, 'graphical.pdf') +run_r(numeric_output_file, graphical_output_file, populations) + +################################################################################ + +info_page = gd_composite.InfoPage() +info_page.set_title('Population structure Galaxy Composite Dataset') + +display_file = gd_composite.DisplayFile() +display_value = gd_composite.DisplayValue() + +out_pdf = gd_composite.Parameter(name='graphical.pdf', value='graphical.pdf', display_type=display_file) +out_txt = gd_composite.Parameter(name='numeric.txt', value='numeric.txt', display_type=display_file) + +info_page.add_output_parameter(out_pdf) +info_page.add_output_parameter(out_txt) + +in_pops = gd_composite.Parameter(description='Number of populations', value=populations, display_type=display_value) + +info_page.add_input_parameter(in_pops) + +misc_pops = gd_composite.Parameter(description=populations_html, display_type=display_value) + +info_page.add_misc(misc_pops) + + +with open (output_file, 'w') as ofh: + print >> ofh, info_page.render() + + +sys.exit(0)
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/population_structure.r Fri Sep 28 11:35:56 2012 -0400 @@ -0,0 +1,19 @@ +library(RColorBrewer) + +args = commandArgs(trailingOnly=TRUE) +q_file = args[[1]] +output_file = args[[2]] +populations = args[[3]] + +tbl <- read.table(q_file) + +if ( populations >= 3 && populations <= 12 ) { + colors = brewer.pal(populations, 'Paired') +} else { + colors = rainbow(populations) +} + +pdf(file=output_file, onefile=TRUE, width=7, height=3) +barplot(t(as.matrix(tbl)), col=colors, xlab="Individual #", ylab="Ancestry", border=NA) + +dev.off()
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/population_structure.xml Fri Sep 28 11:35:56 2012 -0400 @@ -0,0 +1,72 @@ +<tool id="gd_population_structure" name="Ancestry" version="1.0.0"> + <description>: Characterize ancestries w.r.t. inferred ancestral populations</description> + + <command interpreter="python"> + population_structure.py "$input" "${input.extra_files_path}/admix.ped" "$output" "$output.files_path" "$populations" + </command> + + <inputs> + <param name="input" type="data" format="gd_ped" label="Dataset" /> + <param name="populations" type="integer" min="1" value="2" label="Number of populations" /> + </inputs> + + <outputs> + <data name="output" format="html" /> + </outputs> + + <!-- + <tests> + <test> + <param name="input" value="fake" ftype="gd_ped" > + <metadata name="base_name" value="admix" /> + <composite_data value="test_out/prepare_population_structure/prepare_population_structure.html" /> + <composite_data value="test_out/prepare_population_structure/admix.ped" /> + <composite_data value="test_out/prepare_population_structure/admix.map" /> + <edit_attributes type="name" value="fake" /> + </param> + <param name="populations" value="2" /> + + <output name="output" file="test_out/population_structure/population_structure.html" ftype="html" compare="diff" lines_diff="2"> + <extra_files type="file" name="numeric.txt" value="test_out/population_structure/numeric.txt" /> + <extra_files type="file" name="graphical.pdf" value="test_out/population_structure/graphical.pdf" compare="sim_size" delta="1000" /> + </output> + </test> + </tests> + --> + + + <help> + +**Dataset formats** + +The input dataset is in gd_ped_ format. +The output dataset is a composite dataset containing a graph and text. +(`Dataset missing?`_) + +.. _gd_ped: ./static/formatHelp.html#gd_ped +.. _Dataset missing?: ./static/formatHelp.html + +----- + +**What it does** + +The user selects a gd_ped dataset generated by the Prepare Input tool, +and specifies a number, K, of ancestral +populations. The tool estimates the proportion of each individual's ancestry +coming from each ancestral population. The proportions are shown both as +numbers and graphically. + +----- + +**Acknowledgments** + +We use the program "Admixture", downloaded from + +http://www.genetics.ucla.edu/software/admixture/ + +and described in the paper "Fast model-based estimation of ancestry in +unrelated individuals" by David H. Alexander, John Novembre and Kenneth Lange, +Genome Research 19 (2009), pp. 1655-1664. + + </help> +</tool>
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/prepare_population_structure.py Fri Sep 28 11:35:56 2012 -0400 @@ -0,0 +1,144 @@ +#!/usr/bin/env python + +import errno +import os +import shutil +import subprocess +import sys +from Population import Population +import gd_composite + +################################################################################ + +def do_import(filename, files_path, min_reads, min_qual, min_spacing, tags, using_info, population_list): + info_page = gd_composite.InfoPage() + info_page.set_title('Prepare to look for population structure Galaxy Composite Dataset') + + display_file = gd_composite.DisplayFile() + display_value = gd_composite.DisplayValue() + + out_ped = gd_composite.Parameter(name='admix.ped', value='admix.ped', display_type=display_file) + out_map = gd_composite.Parameter(name='admix.map', value='admix.map', display_type=display_file) + out_use = gd_composite.Parameter(description=using_info, display_type=display_value) + + info_page.add_output_parameter(out_ped) + info_page.add_output_parameter(out_map) + info_page.add_output_parameter(out_use) + + in_min_reads = gd_composite.Parameter(description='Minimum reads covering a SNP, per individual', value=min_reads, display_type=display_value) + in_min_qual = gd_composite.Parameter(description='Minimum quality value, per individual', value=min_qual, display_type=display_value) + in_min_spacing = gd_composite.Parameter(description='Minimum spacing between SNPs on the same scaffold', value=min_spacing, display_type=display_value) + + info_page.add_input_parameter(in_min_reads) + info_page.add_input_parameter(in_min_qual) + info_page.add_input_parameter(in_min_spacing) + + misc_populations = gd_composite.Parameter(name='Populations', value=population_list, display_type=gd_composite.DisplayPopulationList()) + info_page.add_misc(misc_populations) + + with open(filename, 'w') as ofh: + print >> ofh, info_page.render() + +def mkdir_p(path): + try: + os.makedirs(path) + except OSError, e: + if e.errno <> errno.EEXIST: + raise + +def die(message, exit=True): + print >> sys.stderr, message + if exit: + sys.exit(1) + +################################################################################ + +if len(sys.argv) < 9: + die("Usage") + +# parse command line +input_snp_filename, min_reads, min_qual, min_spacing, output_filename, output_files_path = sys.argv[1:7] +args = sys.argv[7:] + +individual_metadata = [] +population_files = [] +population_names = [] +all_individuals = False + +for arg in args: + if arg == 'all_individuals': + all_individuals = True + elif len(arg) > 11: + tag = arg[:11] + value = arg[11:] + if tag == 'individual:': + individual_metadata.append(value) + elif tag == 'population:': + filename, name = value.split(':', 1) + population_files.append(filename) + population_names.append(name) + +p_total = Population() +p_total.from_tag_list(individual_metadata) + +individual_population = {} + +population_list = [] + +if all_individuals: + p1 = p_total + p1.name = 'All Individuals' + population_list.append(p1) +else: + p1 = Population() + for idx in range(len(population_files)): + population_file = population_files[idx] + population_name = population_names[idx] + this_pop = Population(population_name) + this_pop.from_population_file(population_file) + population_list.append(this_pop) + p1.from_population_file(population_file) + tags = p1.tag_list() + for tag in tags: + if tag not in individual_population: + individual_population[tag] = population_name + +if not p_total.is_superset(p1): + print >> sys.stderr, 'There is an individual in the population that is not in the SNP table' + sys.exit(1) + +# run tool +prog = 'admix_prep' + +args = [] +args.append(prog) +args.append(input_snp_filename) +args.append(min_reads) +args.append(min_qual) +args.append(min_spacing) + +tags = p1.tag_list() +for tag in tags: + args.append(tag) + +#print "args:", ' '.join(args) +p = subprocess.Popen(args, bufsize=-1, stdin=None, stdout=subprocess.PIPE, stderr=sys.stderr) +(stdoutdata, stderrdata) = p.communicate() +rc = p.returncode + +if rc != 0: + die('admix_prep failed: rc={0}'.format(rc)) + +using_info = stdoutdata.rstrip('\r\n') +mkdir_p(output_files_path) +output_ped_filename = os.path.join(output_files_path, 'admix.ped') +output_map_filename = os.path.join(output_files_path, 'admix.map') +shutil.copy2('admix.ped', output_ped_filename) +shutil.copy2('admix.map', output_map_filename) +do_import(output_filename, output_files_path, min_reads, min_qual, min_spacing, tags, using_info, population_list) + +os.unlink('admix.ped') +os.unlink('admix.map') + +sys.exit(0) +
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/prepare_population_structure.xml Fri Sep 28 11:35:56 2012 -0400 @@ -0,0 +1,118 @@ +<tool id="gd_prepare_population_structure" name="Prepare Input" version="1.0.0"> + <description>: Filter and convert to the format needed for these tools</description> + + <command interpreter="python"> + prepare_population_structure.py "$input" "$min_reads" "$min_qual" "$min_spacing" "$output" "$output.files_path" + #if $individuals.choice == '0' + "all_individuals" + #else if $individuals.choice == '1' + #for $population in $individuals.populations + #set $pop_arg = 'population:%s:%s' % (str($population.p_input), str($population.p_input.name)) + "$pop_arg" + #end for + #end if + #for $individual, $individual_col in zip($input.dataset.metadata.individual_names, $input.dataset.metadata.individual_columns) + #set $arg = 'individual:%s:%s' % ($individual_col, $individual) + "$arg" + #end for + </command> + + <inputs> + <param name="input" type="data" format="gd_snp" label="SNP dataset" /> + <param name="min_reads" type="integer" min="0" value="0" label="Minimum reads covering a SNP, per individual" /> + <param name="min_qual" type="integer" min="0" value="0" label="Minimum quality value, per individual" /> + <param name="min_spacing" type="integer" min="0" value="0" label="Minimum spacing between SNPs on the same scaffold" /> + <conditional name="individuals"> + <param name="choice" type="select" label="Individuals"> + <option value="0" selected="true">All</option> + <option value="1">Choose</option> + </param> + <when value="0" /> + <when value="1"> + <repeat name="populations" title="Population" min="1"> + <param name="p_input" type="data" format="gd_indivs" label="Individuals" /> + </repeat> + </when> + </conditional> + </inputs> + + <outputs> + <data name="output" format="gd_ped"> + <actions> + <action type="metadata" name="base_name" default="admix" /> + </actions> + </data> + </outputs> + + <tests> + <test> + <param name="input" value="test_in/sample.gd_snp" ftype="gd_snp" /> + <param name="min_reads" value="3" /> + <param name="min_qual" value="30" /> + <param name="min_spacing" value="0" /> + <param name="choice" value="0" /> + <output name="output" file="test_out/prepare_population_structure/prepare_population_structure.html" ftype="html" compare="diff" lines_diff="2"> + <extra_files type="file" name="admix.map" value="test_out/prepare_population_structure/admix.map" /> + <extra_files type="file" name="admix.ped" value="test_out/prepare_population_structure/admix.ped" /> + </output> + </test> + </tests> + + <help> + +**Dataset formats** + +The input datasets are in gd_snp_ and gd_indivs_ formats. It is important +for the Individuals datasets to have unique names; rename them if +necessary to make them unique. These names are used by the later tools in +the graphical displays. +The output dataset is gd_ped_. (`Dataset missing?`_) + +.. _gd_snp: ./static/formatHelp.html#gd_snp +.. _gd_indivs: ./static/formatHelp.html#gd_indivs +.. _gd_ped: ./static/formatHelp.html#gd_ped +.. _Dataset missing?: ./static/formatHelp.html + +----- + +**What it does** + +The tool converts a gd_snp dataset into two tables, called "admix.map" and +"admix.ped", needed for estimating the population structure. The user +can read or download those files, or simply pass this tool's output on to +other programs. The user imposes conditions on which SNPs to consider, +such as the minimum coverage and/or quality value for every individual, +or the distance to the closest SNP in the same contig (as named in the +first column of the SNP table). A useful piece of information produced +by the tool is the number of SNPs meeting those conditions, which can +be found by clicking on the eye icon in the history panel after the program +runs. + +----- + +**Example** + +- input:: + + Contig161_chr1_4641264_4641879 115 C T 73.5 chr1 4641382 C 6 0 2 45 8 0 2 51 15 0 2 72 5 0 2 42 6 0 2 45 10 0 2 57 Y 54 0.323 0 + Contig48_chr1_10150253_10151311 11 A G 94.3 chr1 10150264 A 1 0 2 30 1 0 2 30 1 0 2 30 3 0 2 36 1 0 2 30 1 0 2 30 Y 22 +99. 0 + Contig20_chr1_21313469_21313570 66 C T 54.0 chr1 21313534 C 4 0 2 39 4 0 2 39 5 0 2 42 4 0 2 39 4 0 2 39 5 0 2 42 N 1 +99. 0 + etc. + +- output map file:: + + 1 snp1 0 2 + 1 snp3 0 4 + 1 snp4 0 5 + 1 snp5 0 6 + 1 snp6 0 7 + 1 snp7 0 8 + 1 snp8 0 9 + 1 snp9 0 10 + +- output ped file:: + + PB1 1 0 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 2 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 2 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 2 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 2 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 2 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 + + </help> +</tool>
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/rank_pathways.xml Fri Sep 28 11:35:56 2012 -0400 @@ -0,0 +1,76 @@ +<tool id="gd_calc_freq" name="Rank Pathways" version="1.0.0"> + <description>: Assess the impact of gene sets on pathways</description> + + <command interpreter="python"> + #if str($output_format) == 'a' + calctfreq.py + #else if str($output_format) == 'b' + calclenchange.py + #end if + "--loc_file=${GALAXY_DATA_INDEX_DIR}/gd.rank.loc" + "--species=${input.metadata.dbkey}" + "--input=${input}" + "--output=${output}" + "--posKEGGclmn=${input.metadata.kegg_path}" + "--KEGGgeneposcolmn=${input.metadata.kegg_gene}" + </command> + + <inputs> + <param name="input" type="data" format="gd_sap" label="Table"> + <validator type="metadata" check="kegg_gene,kegg_path" message="Missing KEGG gene code column and/or KEGG pathway code/name column metadata. Click the pencil icon in the history item to edit/save the metadata attributes" /> + </param> + <param name="output_format" type="select" label="Output format"> + <option value="a" selected="true">ranked by percentage of genes affected</option> + <option value="b">ranked by change in length and number of paths</option> + </param> + </inputs> + + <outputs> + <data name="output" format="tabular" /> + </outputs> + + <tests> + <test> + <param name="input" value="test_in/sample.gd_sap" ftype="gd_sap" /> + <param name="output_format" value="a" /> + <output name="output" file="test_out/rank_pathways/rank_pathways.tabular" /> + </test> + </tests> + + <help> + +**What it does** + +This tool produces a table ranking the pathways based on the percentage +of genes in an input dataset, out of the total in each pathway. +Alternatively, the tool ranks the pathways based on the change in +length and number of paths connecting sources and sinks. This change is +calculated between graphs representing pathways with and without excluding +the nodes that represent the genes in an input list. Sources are all +the nodes representing the initial reactants/products in the pathway. +Sinks are all the nodes representing the final reactants/products in +the pathway. + +If pathways are ranked by percentage of genes affected, the output is +a tabular dataset with the following columns: + + 1. number of genes in the pathway present in the input dataset + 2. percentage of the total genes in the pathway included in the input dataset + 3. rank of the frequency (from high freq to low freq) + 4. name of the pathway + +If pathways are ranked by change in length and number of paths, the +output is a tabular dataset with the following columns: + + 1. change in the mean length of paths between sources and sinks + 2. mean length of paths between sources and sinks in the pathway including the genes in the input dataset. If the pathway do not have sources/sinks, the length is assumed to be infinite (I) + 3. mean length of paths between sources and sinks in the pathway excluding the genes in the input dataset. If the pathway do not have sources/sinks, the length is assumed to be infinite (I) + 4. rank of the change in the mean length of paths between sources and sinks (from high change to low change) + 5. change in the number of paths between sources and sinks + 6. number of paths between sources and sinks in the pathway including the genes in the input dataset. If the pathway do not have sources/sinks, it is assumed to be a circuit (C) + 7. number of paths between sources and sinks in the pathway excluding the genes in the input dataset. If the pathway do not have sources/sinks, it is assumed to be a circuit (C) + 8. rank of the change in the number of paths between sources and sinks (from high change to low change) + 9. name of the pathway + + </help> +</tool>
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/rtrnKEGGpthwfENSEMBLTc.py Fri Sep 28 11:35:56 2012 -0400 @@ -0,0 +1,78 @@ +#!/usr/bin/env python +# -*- coding: utf-8 -*- +# +# calclenchange.py +# +# Copyright 2011 Oscar Bedoya-Reina <oscar@niska.bx.psu.edu> +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program; if not, write to the Free Software +# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, +# MA 02110-1301, USA. + +import argparse,os,sys + + +def main(): + parser = argparse.ArgumentParser(description='Adds the fields KEGG gene codes and KEGG pathways to an input table of ENSEMBL transcript codes.') + parser.add_argument('--loc_file',metavar='correlational database',type=str,help='correlational database') + parser.add_argument('--species',metavar='species name',type=str,help='the species of interest in loc_file') + parser.add_argument('--output',metavar='output TXT file',type=str,help='the output file with the table in txt format. The output will have two more fields: KEGG gene codes and KEGG pathways of each ENSEMBL code' ) + parser.add_argument('--posENSEMBLclmn',metavar='column number',type=int,help='the column with the ENSEMBLE transcript code') + parser.add_argument('--input',metavar='input TXT file',type=str,help='the input file with the table in txt format') + #~ + #~Open arguments + class C(object): + pass + fulargs=C() + parser.parse_args(sys.argv[1:],namespace=fulargs) + #test input vars + inputf,loc_file,species,output,posENSEMBLclmn=fulargs.input,fulargs.loc_file,fulargs.species,fulargs.output,fulargs.posENSEMBLclmn + posENSEMBLclmn-=1#correct pos + #~ Get the extra variables + crDB=[x.split() for x in open(loc_file).read().splitlines() if x.split()[0]==species][0] + sppPrefx,dinput=crDB[0],crDB[1]#X should be replaced by the position in which the Conversion Dictionary File (CDF) is placed + #make a dictionary of the input CDF + dKEGGcPthws=dict([(x.split('\t')[0],'\t'.join(x.split('\t')[1:])) for x in open(dinput).read().splitlines() if x.strip()]) + #~ add the two new columns + sall=[] + #lENSEMBLTc=[x.split('\t') for x in open(inputf).read().splitlines() if x.strip()] + lENSEMBLTc = [] + with open(inputf) as fh: + for line in fh: + if line.startswith('#'): + continue + lENSEMBLTc.append(line.rstrip('\r\n').split('\t')) + nLines=len(lENSEMBLTc) + cLines=0 + sall=[]#the output list for with additional fields + #~ + while cLines<nLines: + cLines+=1 + lENSEMBLTcKEGGgKEGGpth=lENSEMBLTc.pop(0) + ENSEMBLTc=lENSEMBLTcKEGGgKEGGpth[posENSEMBLclmn] + try: + KEGGgKEGGpth=dKEGGcPthws[ENSEMBLTc] + except: + KEGGgKEGGpth='\t'.join(['U','N']) + sall.append('\t'.join(['\t'.join(lENSEMBLTcKEGGgKEGGpth),KEGGgKEGGpth])) + #~ + salef=open(output,'w') + salef.write('\n'.join(sall)) + salef.close() + return 0 + + +if __name__ == '__main__': + main() +
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/select_snps.py Fri Sep 28 11:35:56 2012 -0400 @@ -0,0 +1,153 @@ +#!/usr/bin/env python + +import os +import sys +import math +from optparse import OptionParser +import genome_diversity as gd + +def main_function(parse_arguments=None): + if parse_arguments is None: + parse_arguments = lambda arguments: (None, arguments) + def main_decorator(to_decorate): + def decorated_main(arguments=None): + if arguments is None: + arguments = sys.argv + options, arguments = parse_arguments(arguments) + sys.exit(to_decorate(options, arguments)) + return decorated_main + return main_decorator + +def parse_arguments(arguments): + parser = OptionParser() + parser.add_option('--input', dest='input') + parser.add_option('--output', dest='output') + parser.add_option('--index_dir', dest='index_dir') + parser.add_option('--num_snps', dest='num_snps') + parser.add_option('--ref_chrom_col', dest='ref_chrom_col') + parser.add_option('--ref_pos_col', dest='ref_pos_col') + parser.add_option('--ref_species', dest='ref_species') + return parser.parse_args(arguments[1:]) + +@main_function(parse_arguments) +def main(options, arguments): + + ref_chrom_idx = to_int( options.ref_chrom_col ) -1 + ref_pos_idx = to_int( options.ref_pos_col ) -1 + + if (ref_chrom_idx < 1) or (ref_pos_idx < 1) or (ref_chrom_idx == ref_pos_idx): + print >> sys.stderr, "Cannot locate reference genome sequence (ref) or reference genome position (rPos) column for this dataset." + sys.exit(1) + + chrom_len_root = os.path.join( options.index_dir, 'shared/ucsc/chrom') + chrom_len_file = '%s.len' % options.ref_species + chrom_len_path = os.path.join(chrom_len_root, chrom_len_file) + + chrlens = gd.ChrLens( chrom_len_path ) + + total_len = 0 + for chrom in chrlens: + total_len += chrlens.length(chrom) + + total_requested = int( options.num_snps ) + lines, data, comments = get_snp_lines_data_and_comments( options.input, ref_chrom_idx, ref_pos_idx ) + selected = select_snps( data, total_len, total_requested ) + out_data = fix_selection_and_order_like_input(data, selected, total_requested) + write_selected_snps( options.output, out_data, lines, comments ) + +def to_int( value ): + try: + int_value = int( value ) + except ValueError: + int_value = 0 + return int_value + +def get_snp_lines_data_and_comments( filename, chrom_idx, pos_idx ): + fh = open( filename, 'r' ) + if (chrom_idx >= pos_idx): + needed = chrom_idx + 1 + else: + needed = pos_idx + 1 + lines = [] + data = [] + comments = [] + line_idx = 0 + line_num = 0 + for line in fh: + line_num += 1 + line = line.rstrip('\r\n') + if line: + if line.startswith('#'): + comments.append(line) + else: + elems = line.split('\t') + if len(elems) >= needed: + chrom = elems[chrom_idx] + try: + pos = int(elems[pos_idx]) + except ValueError: + sys.stderr.write( "bad reference position in line %d column %d: %s\n" % ( line_num, pos_idx+1, elems[pos_idx] ) ) + sys.exit(1) + lines.append(line) + chrom_sort = chrom.lstrip('chr') + data.append( [chrom_sort, chrom, pos, line_num, line_idx] ) + line_idx += 1 + fh.close() + data = sorted( data, key=lambda x: (x[0], x[2]) ) + return lines, data, comments + +def select_snps( data, total_len, requested ): + old_chrom = None + next_print = 0 + selected = [] + space = total_len / requested + for data_idx, datum in enumerate( data ): + chrom = datum[1] + pos = datum[2] + if chrom != old_chrom: + old_chrom = chrom + next_print = 0 + if pos >= next_print: + selected.append(data_idx) + next_print += space + return selected + +def fix_selection_and_order_like_input(data, selected, requested): + total_selected = len( selected ) + a = float( total_selected ) / requested + b = a / 2 + + idx_list = [] + for i in range( requested ): + idx = int( math.ceil( i * a + b ) - 1 ) + idx_list.append( idx ) + + out_data = [] + + for i, data_idx in enumerate(selected): + if total_selected > requested: + if i in idx_list: + out_data.append(data[data_idx]) + else: + out_data.append(data[data_idx]) + + out_data = sorted( out_data, key=lambda x: x[3] ) + + return out_data + +def write_selected_snps( filename, data, lines, comments ): + fh = open( filename, 'w' ) + + for comment in comments: + fh.write("%s\n" % comment ) + + for datum in data: + line_idx = datum[4] + fh.write("%s\n" % lines[line_idx]) + + fh.close() + +if __name__ == "__main__": + main() + +
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/select_snps.xml Fri Sep 28 11:35:56 2012 -0400 @@ -0,0 +1,89 @@ +<tool id="gd_select_snps" name="Sample SNPs" version="1.0.0"> + <description>: Select a specified number of SNPs, uniformly spaced</description> + + <command interpreter="python"> + select_snps.py "--input=$input" "--output=$output" "--index_dir=$GALAXY_DATA_INDEX_DIR" "--num_snps=$num_snps" + #if $override_metadata.choice == "0": + "--ref_chrom_col=${input.metadata.ref}" "--ref_pos_col=${input.metadata.rPos}" "--ref_species=${input.metadata.dbkey}" + #else + "--ref_chrom_col=$ref_col" "--ref_pos_col=$rpos_col" "--ref_species=$ref_species" + #end if + </command> + + <inputs> + <param format="tabular" name="input" type="data" label="Selected SNPS dataset"> + <validator type="unspecified_build" message="This dataset does not have a reference species and cannot be used with this tool" /> + </param> + <param name="num_snps" type="integer" value="10" optional="false" min="1" label="Number of SNPs"/> + <conditional name="override_metadata"> + <param name="choice" type="select" format="integer" label="choose columns"> + <option value="0" selected="true">No, get columns from metadata</option> + <option value="1" >Yes, choose columns</option> + </param> + <when value="0" /> + <when value="1"> + <param name="ref_col" type="data_column" data_ref="input" numerical="false" label="Column with reference chromosome"/> + <param name="rpos_col" type="data_column" data_ref="input" numerical="true" label="Column with reference position"/> + <param name="ref_species" type="select" label="Choose reference species"> + <options from_file="gd.ref_species.txt"> + <column name="name" index="1"/> + <column name="value" index="0"/> + </options> + </param> + </when> + </conditional> + </inputs> + + <outputs> + <data format="gd_snp" name="output" metadata_source="input"/> + </outputs> + + <tests> + <test> + <param name="input" value="test_in/sample.gd_snp" ftype="gd_snp"/> + <param name="num_snps" value="100"/> + <param name="choice" value="0"/> + <output name="output" file="test_out/select_snps/select_snps.gd_snp" /> + </test> + </tests> + + + <help> + +**What it does** + + This tool attempts to select a specified number of SNPs from the dataset, making them + approximately uniformly spaced relative to the reference genome. The number + actually selected may be slightly more than the specified number. + +----- + +**Example** + +- input file:: + + chr2_75111355_75112576 314 A C L F chr2 75111676 C F 15 4 53 2 9 48 Y 96 0.369 0.355 0.396 0 + chr8_93901796_93905612 2471 A C A A chr8 93904264 A A 8 0 51 10 2 14 Y 961 0.016 0.534 0.114 2 + chr10_7434473_7435447 524 T C S S chr10 7435005 T S 11 5 90 14 0 69 Y 626 0.066 0.406 0.727 0 + chr14_80021455_80022064 138 G A H H chr14 80021593 G H 14 0 69 9 6 124 Y 377 0.118 0.997 0.195 1 + chr15_64470252_64471048 89 G A Y Y chr15 64470341 G Y 5 6 109 14 0 69 Y 312 0.247 0.998 0.393 0 + chr18_48070585_48071386 514 C T E K chr18 48071100 T K 7 7 46 14 0 69 Y 2 0.200 0.032 0.163 0 + chr18_50154905_50155664 304 A G Y C chr18 50155208 A Y 4 2 17 5 1 22 Y 8 0.022 0.996 0.128 0 + chr18_57379354_57380496 315 C T V V chr18 57379669 G V 11 0 60 9 6 62 Y 726 0.118 0.048 0.014 1 + chr19_14240610_14242055 232 C T A V chr19 14240840 C A 18 8 56 15 5 42 Y 73 0.003 0.153 0.835 0 + chr19_39866997_39874915 3117 C T P P chr19 39870110 C P 3 7 65 14 2 32 Y 6 0.321 0.911 0.462 4 + etc. + +- output file:: + + chr2_75111355_75112576 314 A C L F chr2 75111676 C F 15 4 53 2 9 48 Y 96 0.369 0.355 0.396 0 + chr8_93901796_93905612 2471 A C A A chr8 93904264 A A 8 0 51 10 2 14 Y 961 0.016 0.534 0.114 2 + chr10_7434473_7435447 524 T C S S chr10 7435005 T S 11 5 90 14 0 69 Y 626 0.066 0.406 0.727 0 + chr14_80021455_80022064 138 G A H H chr14 80021593 G H 14 0 69 9 6 124 Y 377 0.118 0.997 0.195 1 + chr15_64470252_64471048 89 G A Y Y chr15 64470341 G Y 5 6 109 14 0 69 Y 312 0.247 0.998 0.393 0 + chr18_48070585_48071386 514 C T E K chr18 48071100 T K 7 7 46 14 0 69 Y 2 0.200 0.032 0.163 0 + chr19_14240610_14242055 232 C T A V chr19 14240840 C A 18 8 56 15 5 42 Y 73 0.003 0.153 0.835 0 + etc. + + </help> +</tool>
--- a/specify.xml Fri Sep 28 11:34:31 2012 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,87 +0,0 @@ -<tool id="gd_specify" name="Specify Individuals" version="1.0.0"> - <description>: Define a collection of individuals from a gd_snp dataset</description> - - <command interpreter="bash"> - echo.bash "$input" "$output" - #for $individual in str($individuals).split(',') - #set $individual_idx = $input.dataset.metadata.individual_names.index($individual) - #set $individual_col = str( $input.dataset.metadata.individual_columns[$individual_idx] ) - #set $arg = '\t'.join([$individual_col, $individual, '']) - "$arg" - #end for - </command> - - <inputs> - <param name="input" type="data" format="gd_snp" label="SNP dataset"/> - <param name="individuals" type="select" display="checkboxes" multiple="true" label="Individuals to include"> - <options> - <filter type="data_meta" ref="input" key="individual_names" /> - </options> - <validator type="no_options" message="You must select at least one individual."/> - </param> - <param name="outname" type="text" size="20" label="Label for this collection"> - <validator type="empty_field" message="You must enter a label."/> - #used to be "Individuals from ${input.hid}" - </param> - </inputs> - - <outputs> - <data name="output" format="gd_indivs" label="${outname}" /> - </outputs> - - <tests> - <test> - <param name="input" value="test_in/sample.gd_snp" ftype="gd_snp" /> - <param name="individuals" value="PB1,PB2" /> - <output name="output" file="test_in/a.gd_indivs" /> - </test> - </tests> - - <help> - -**Dataset formats** - -The input dataset is in gd_snp_ format; -the output is in gd_indivs_ format. (`Dataset missing?`_) - -.. _gd_snp: ./static/formatHelp.html#gd_snp -.. _gd_indivs: ./static/formatHelp.html#gd_indivs -.. _Dataset missing?: ./static/formatHelp.html - ------ - -**What it does** - -This tool makes a list of selected entities (the sets of four columns -representing individuals or groups) from a gd_snp dataset. It does not copy -the SNP data; it just records which entities should be considered as belonging -to some collection or population. The label you specify is used to name the -output dataset in your history. This list can then be used to instruct other -tools to work on just part of the original gd_snp dataset. - ------ - -**Example** - -- input:: - - Contig161_chr1_4641264_4641879 115 C T 73.5 chr1 4641382 C 6 0 2 45 8 0 2 51 15 0 2 72 5 0 2 42 6 0 2 45 10 0 2 57 Y 54 0.323 0 - Contig48_chr1_10150253_10151311 11 A G 94.3 chr1 10150264 A 1 0 2 30 1 0 2 30 1 0 2 30 3 0 2 36 1 0 2 30 1 0 2 30 Y 22 +99. 0 - Contig20_chr1_21313469_21313570 66 C T 54.0 chr1 21313534 C 4 0 2 39 4 0 2 39 5 0 2 42 4 0 2 39 4 0 2 39 5 0 2 42 N 1 +99. 0 - etc. - -- input metadata:: - - #{"column_names":["scaf","pos","A","B","qual","ref","rpos","rnuc", - #"1A","1B","1G","1Q","2A","2B","2G","2Q","3A","3B","3G","3Q","4A","4B","4G","4Q","5A","5B","5G","5Q","6A","6B","6G","6Q", - #"pair","dist","prim","rflp"],"dbkey":"canFam2","individuals":[["PB1",9],["PB2",13],["PB3",17],["PB4",21],["PB6",25],["PB8",29]], - #"pos":2,"rPos":7,"ref":6,"scaffold":1,"species":"bear"} - -- output when individuals PB1, PB2, and PB3 are selected:: - - 9 PB1 - 13 PB2 - 17 PB3 - - </help> -</tool>
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/specify_restriction_enzymes.py Fri Sep 28 11:35:56 2012 -0400 @@ -0,0 +1,103 @@ +#!/usr/bin/env python + +import os +import sys +from optparse import OptionParser +import genome_diversity as gd + +def main_function( parse_arguments=None ): + if parse_arguments is None: + parse_arguments = lambda arguments: ( None, arguments ) + def main_decorator( to_decorate ): + def decorated_main( arguments=None ): + if arguments is None: + arguments = sys.argv + options, arguments = parse_arguments( arguments ) + rc = 1 + try: + rc = to_decorate( options, arguments ) + except Exception, err: + sys.stderr.write( 'ERROR: %s\n' % str( err ) ) + traceback.print_exc() + finally: + sys.exit( rc ) + return decorated_main + return main_decorator + +def parse_arguments( arguments ): + parser = OptionParser() + parser.add_option('--input', + type='string', dest='input', + help='file of selected SNPs') + parser.add_option('--output', + type='string', dest='output', + help='output file') + parser.add_option('--primers_loc', + type='string', dest='primers_loc', + help='primers .loc file') + parser.add_option('--scaffold_col', + type="int", dest='scaffold_col', + help='scaffold column in the input file') + parser.add_option('--pos_col', + type="int", dest='pos_col', + help='position column in the input file') + parser.add_option('--enzyme_list', + type="string", dest='enzyme_list_string', + help='comma separated list of enzymes') + parser.add_option('--species', + type="string", dest='species', + help='species') + return parser.parse_args( arguments[1:] ) + + +@main_function( parse_arguments ) +def main( options, arguments ): + if not options.input: + raise RuntimeError( 'missing --input option' ) + if not options.output: + raise RuntimeError( 'missing --output option' ) + if not options.primers_loc: + raise RuntimeError( 'missing --primers_loc option' ) + if not options.scaffold_col: + raise RuntimeError( 'missing --scaffold_col option' ) + if not options.pos_col: + raise RuntimeError( 'missing --pos_col option' ) + if not options.enzyme_list_string: + raise RuntimeError( 'missing --enzyme_list option' ) + if not options.species: + raise RuntimeError( 'missing --species option' ) + + snps = gd.SnpFile( filename=options.input, seq_col=int( options.scaffold_col ), pos_col=int( options.pos_col ) ) + + out_fh = gd._openfile( options.output, 'w' ) + + enzyme_dict = {} + for enzyme in options.enzyme_list_string.split( ',' ): + enzyme = enzyme.strip() + if enzyme: + enzyme_dict[enzyme] = 1 + + primer_data_file = gd.get_filename_from_loc( options.species, options.primers_loc ) + file_root, file_ext = os.path.splitext( primer_data_file ) + primer_index_file = file_root + ".cdb" + primers = gd.PrimersFile( data_file=primer_data_file, index_file=primer_index_file ) + + comments_printed = False + + while snps.next(): + seq, pos = snps.get_seq_pos() + enzyme_list = primers.get_enzymes( seq, pos ) + for enzyme in enzyme_list: + if enzyme in enzyme_dict: + if not comments_printed: + for comment in snps.comments: + out_fh.write( "%s\n" % comment ) + comments_printed = True + out_fh.write( "%s\n" % snps.line ) + break + + out_fh.close() + +if __name__ == "__main__": + main() +
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/specify_restriction_enzymes.xml Fri Sep 28 11:35:56 2012 -0400 @@ -0,0 +1,90 @@ +<tool id="gd_specify_restriction_enzymes" name="Differential Cleavage" version="1.0.0"> + <description>: Select SNPs differentially cut by specified restriction enzymes</description> + + <command interpreter="python"> + specify_restriction_enzymes.py "--input=$input" "--output=$output" "--primers_loc=${GALAXY_DATA_INDEX_DIR}/gd.primers.loc" + #if $override_metadata.choice == "0": + "--scaffold_col=${input.metadata.scaffold}" "--pos_col=${input.metadata.pos}" "--species=${input.metadata.species}" + #else + "--scaffold_col=$scaf_col" "--pos_col=$pos_col" "--species=$species" + #end if + "--enzyme_list=$enzymes" + </command> + + <inputs> + <param format="tabular" name="input" type="data" label="Selected SNPS dataset"/> + <conditional name="override_metadata"> + <param name="choice" type="select" format="integer" label="choose columns"> + <option value="0" selected="true">No, get columns from metadata</option> + <option value="1" >Yes, choose columns</option> + </param> + <when value="0" /> + <when value="1"> + <param name="scaf_col" type="data_column" data_ref="input" numerical="false" label="Column with scaffold"/> + <param name="pos_col" type="data_column" data_ref="input" numerical="true" label="Column with position"/> + <param name="species" type="select" label="Choose species"> + <options from_file="gd.species.txt"> + <column name="name" index="1"/> + <column name="value" index="0"/> + </options> + </param> + </when> + </conditional> + + <param name="enzymes" type="select" display="checkboxes" multiple="true" label="Choose enzymes"> + <options from_file="gd.restriction_enzymes.txt"> + <column name="name" index="0"/> + <column name="value" index="1"/> + </options> + </param> + </inputs> + + <outputs> + <data format="gd_snp" name="output" metadata_source="input"/> + </outputs> + + <tests> + <test> + <param name="input" value="test_out/select_snps/select_snps.gd_snp" ftype="gd_snp" /> + <param name="choice" value="0" /> + <param name="enzymes" value="Bsp1286I,HaeII,RsaI" /> + <output name="output" file="test_out/specify_restriction_enzymes/specify_restriction_enzymes.gd_snp" /> + </test> + </tests> + + <help> + +**What it does** + + It selects the SNPs that are differentially cut by at least one of the + specified restriction enzymes. The enzymes are required to cut the amplified + segment (for the specified PCR primers) only at the SNP. + +----- + +**Example** + +- input file:: + + chr2_75111355_75112576 314 A C L F chr2 75111676 C F 15 4 53 2 9 48 Y 96 0.369 0.355 0.396 0 + chr8_93901796_93905612 2471 A C A A chr8 93904264 A A 8 0 51 10 2 14 Y 961 0.016 0.534 0.114 2 + chr10_7434473_7435447 524 T C S S chr10 7435005 T S 11 5 90 14 0 69 Y 626 0.066 0.406 0.727 0 + chr14_80021455_80022064 138 G A H H chr14 80021593 G H 14 0 69 9 6 124 Y 377 0.118 0.997 0.195 1 + chr15_64470252_64471048 89 G A Y Y chr15 64470341 G Y 5 6 109 14 0 69 Y 312 0.247 0.998 0.393 0 + chr18_48070585_48071386 514 C T E K chr18 48071100 T K 7 7 46 14 0 69 Y 2 0.200 0.032 0.163 0 + chr18_50154905_50155664 304 A G Y C chr18 50155208 A Y 4 2 17 5 1 22 Y 8 0.022 0.996 0.128 0 + chr18_57379354_57380496 315 C T V V chr18 57379669 G V 11 0 60 9 6 62 Y 726 0.118 0.048 0.014 1 + chr19_14240610_14242055 232 C T A V chr19 14240840 C A 18 8 56 15 5 42 Y 73 0.003 0.153 0.835 0 + chr19_39866997_39874915 3117 C T P P chr19 39870110 C P 3 7 65 14 2 32 Y 6 0.321 0.911 0.462 4 + etc. + +- output file:: + + chr8_93901796_93905612 2471 A C A A chr8 93904264 A A 8 0 51 10 2 14 Y 961 0.016 0.534 0.114 2 + chr14_80021455_80022064 138 G A H H chr14 80021593 G H 14 0 69 9 6 124 Y 377 0.118 0.997 0.195 1 + chr18_57379354_57380496 315 C T V V chr18 57379669 G V 11 0 60 9 6 62 Y 726 0.118 0.048 0.014 1 + chr19_39866997_39874915 3117 C T P P chr19 39870110 C P 3 7 65 14 2 32 Y 6 0.321 0.911 0.462 4 + etc. + + </help> +</tool>
--- a/test-data/test_in/a.gd_indivs Fri Sep 28 11:34:31 2012 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,2 +0,0 @@ -9 PB1 -13 PB2
--- a/test-data/test_in/b.gd_indivs Fri Sep 28 11:34:31 2012 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,2 +0,0 @@ -17 PB3 -21 PB4
--- a/test-data/test_in/c.gd_indivs Fri Sep 28 11:34:31 2012 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,2 +0,0 @@ -25 PB6 -29 PB8
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/test_in/ensembl.tabular Fri Sep 28 11:35:56 2012 -0400 @@ -0,0 +1,150 @@ +ENSCAFT00000000001 +ENSCAFT00000000144 +ENSCAFT00000000160 +ENSCAFT00000000215 +ENSCAFT00000000233 +ENSCAFT00000000365 +ENSCAFT00000000507 +ENSCAFT00000000517 +ENSCAFT00000000674 +ENSCAFT00000000724 +ENSCAFT00000000760 +ENSCAFT00000000762 +ENSCAFT00000001047 +ENSCAFT00000001052 +ENSCAFT00000001063 +ENSCAFT00000001076 +ENSCAFT00000001104 +ENSCAFT00000001141 +ENSCAFT00000001146 +ENSCAFT00000001204 +ENSCAFT00000001219 +ENSCAFT00000001250 +ENSCAFT00000001352 +ENSCAFT00000001363 +ENSCAFT00000001421 +ENSCAFT00000001523 +ENSCAFT00000001575 +ENSCAFT00000001587 +ENSCAFT00000001597 +ENSCAFT00000002056 +ENSCAFT00000002100 +ENSCAFT00000002110 +ENSCAFT00000002175 +ENSCAFT00000002259 +ENSCAFT00000002460 +ENSCAFT00000002537 +ENSCAFT00000002577 +ENSCAFT00000002578 +ENSCAFT00000002660 +ENSCAFT00000002792 +ENSCAFT00000002849 +ENSCAFT00000002999 +ENSCAFT00000003163 +ENSCAFT00000003223 +ENSCAFT00000003307 +ENSCAFT00000003515 +ENSCAFT00000003560 +ENSCAFT00000003644 +ENSCAFT00000003824 +ENSCAFT00000003840 +ENSCAFT00000004092 +ENSCAFT00000004103 +ENSCAFT00000004208 +ENSCAFT00000004253 +ENSCAFT00000004311 +ENSCAFT00000004464 +ENSCAFT00000004511 +ENSCAFT00000004609 +ENSCAFT00000004673 +ENSCAFT00000004726 +ENSCAFT00000004799 +ENSCAFT00000004933 +ENSCAFT00000004993 +ENSCAFT00000005126 +ENSCAFT00000005142 +ENSCAFT00000005225 +ENSCAFT00000005323 +ENSCAFT00000005467 +ENSCAFT00000005496 +ENSCAFT00000005518 +ENSCAFT00000005653 +ENSCAFT00000005746 +ENSCAFT00000005749 +ENSCAFT00000005832 +ENSCAFT00000005972 +ENSCAFT00000006025 +ENSCAFT00000006114 +ENSCAFT00000006157 +ENSCAFT00000006219 +ENSCAFT00000006272 +ENSCAFT00000006453 +ENSCAFT00000006479 +ENSCAFT00000006507 +ENSCAFT00000006669 +ENSCAFT00000006689 +ENSCAFT00000006827 +ENSCAFT00000006891 +ENSCAFT00000007130 +ENSCAFT00000007145 +ENSCAFT00000007244 +ENSCAFT00000007375 +ENSCAFT00000007440 +ENSCAFT00000007467 +ENSCAFT00000007484 +ENSCAFT00000007527 +ENSCAFT00000007553 +ENSCAFT00000007697 +ENSCAFT00000007703 +ENSCAFT00000007747 +ENSCAFT00000007774 +ENSCAFT00000007776 +ENSCAFT00000007779 +ENSCAFT00000007859 +ENSCAFT00000007951 +ENSCAFT00000007959 +ENSCAFT00000008012 +ENSCAFT00000008063 +ENSCAFT00000008142 +ENSCAFT00000008198 +ENSCAFT00000008413 +ENSCAFT00000008540 +ENSCAFT00000008586 +ENSCAFT00000008588 +ENSCAFT00000008673 +ENSCAFT00000008678 +ENSCAFT00000008728 +ENSCAFT00000008769 +ENSCAFT00000008831 +ENSCAFT00000009074 +ENSCAFT00000009114 +ENSCAFT00000009614 +ENSCAFT00000009698 +ENSCAFT00000009710 +ENSCAFT00000010094 +ENSCAFT00000010141 +ENSCAFT00000010439 +ENSCAFT00000010496 +ENSCAFT00000010516 +ENSCAFT00000010531 +ENSCAFT00000010559 +ENSCAFT00000010593 +ENSCAFT00000010616 +ENSCAFT00000010630 +ENSCAFT00000010829 +ENSCAFT00000010865 +ENSCAFT00000010931 +ENSCAFT00000010977 +ENSCAFT00000010988 +ENSCAFT00000011187 +ENSCAFT00000011380 +ENSCAFT00000011397 +ENSCAFT00000011721 +ENSCAFT00000011730 +ENSCAFT00000011771 +ENSCAFT00000011789 +ENSCAFT00000011968 +ENSCAFT00000012081 +ENSCAFT00000012133 +ENSCAFT00000012159 +ENSCAFT00000012254
--- a/test-data/test_in/sample.gd_sap Fri Sep 28 11:34:31 2012 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,401 +0,0 @@ -#{"column_names":["contig","pos","ref","rPos","trns","pep","AA1","loc","AA2","KEGG","pred","path"],"pos":2,"rPos":4,"ref":3,"dbkey":"canFam2","scaffold":1,"species":"bear","kegg_gene":10,"kegg_path":12} -Contig39_chr1_3261104_3261850 414 chr1 3261546 ENSCAFT00000000001 ENSCAFP00000000001 S 667 F 476153 probably damaging cfa00230=Purine metabolism.cfa00500=Starch and sucrose metabolism.cfa00740=Riboflavin metabolism.cfa00760=Nicotinate and nicotinamide metabolism.cfa00770=Pantothenate and CoA biosynthesis.cfa01100=Metabolic pathways -Contig62_chr1_19011969_19012646 265 chr1 19012240 ENSCAFT00000000144 ENSCAFP00000000125 * 161 R 483960 probably damaging N -Contig36_chr1_20102654_20103213 365 chr1 20103029 ENSCAFT00000000160 ENSCAFP00000000140 R 407 Q 610160 possibly damaging N -Contig136_chr10_3710404_3714591 3079 chr10 3713499 ENSCAFT00000000215 ENSCAFP00000000194 T 103 P U benign N -Contig36_chr1_23682012_23682647 374 chr1 23682388 ENSCAFT00000000233 ENSCAFP00000000210 N 234 S 483973 benign N -Contig163_chr10_4573526_4574494 487 chr10 4574010 ENSCAFT00000000365 ENSCAFP00000000332 R 186 K 474414 benign cfa00450=Selenocompound metabolism.cfa00970=Aminoacyl-tRNA biosynthesis -Contig55_chr1_40056604_40059808 2081 chr1 40058686 ENSCAFT00000000507 ENSCAFP00000000458 I 247 K 484023 possibly damaging N -Contig17_chr1_40203628_40205630 1417 chr1 40205044 ENSCAFT00000000517 ENSCAFP00000000468 N 109 S 476233 benign N -Contig97_chr1_44847984_44848380 285 chr1 44848272 ENSCAFT00000000674 ENSCAFP00000000618 Q 27 R 611986 benign N -Contig214_chr10_16106753_16106969 121 chr10 16106873 ENSCAFT00000000724 ENSCAFP00000000668 A 301 T 609478 benign N -Contig75_chr1_45731970_45732932 436 chr1 45732397 ENSCAFT00000000760 ENSCAFP00000000701 I 490 V U benign N -Contig33_chr1_45614845_45617413 1835 chr1 45616685 ENSCAFT00000000760 ENSCAFP00000000701 A 4390 V U benign N -Contig95_chr10_18829724_18831056 914 chr10 18830645 ENSCAFT00000000762 ENSCAFP00000000703 A 512 V U possibly damaging N -Contig197_chr13_8622062_8623071 606 chr13 8622665 ENSCAFT00000001047 ENSCAFP00000000959 T 406 I 475067 possibly damaging cfa00240=Pyrimidine metabolism.cfa00410=beta-Alanine metabolism.cfa00770=Pantothenate and CoA biosynthesis.cfa00983=Drug metabolism - other enzymes.cfa01100=Metabolic pathways -Contig243_chr10_19959210_19960069 701 chr10 19959858 ENSCAFT00000001052 ENSCAFP00000000964 E 1345 K U benign N -Contig137_chr13_10622950_10624043 1039 chr13 10623979 ENSCAFT00000001063 ENSCAFP00000000975 E 10 K 481999 benign N -Contig137_chr13_10622950_10624043 1006 chr13 10623946 ENSCAFT00000001063 ENSCAFP00000000975 R 21 C 481999 probably damaging N -Contig115_chr12_4411478_4412322 124 chr12 4411614 ENSCAFT00000001076 ENSCAFP00000000986 R 177 H U benign N -Contig150_chr12_4438230_4439944 385 chr12 4438614 ENSCAFT00000001104 ENSCAFP00000001014 Y 277 D 607591 benign N -Contig84_chr1_52076858_52077103 80 chr1 52076943 ENSCAFT00000001141 ENSCAFP00000001046 C 147 Y 484064 benign N -Contig29_chr13_13215547_13217183 793 chr13 13216352 ENSCAFT00000001146 ENSCAFP00000001050 P 1 R 475076 probably damaging N -Contig251_chr10_22876556_22877097 152 chr10 22876714 ENSCAFT00000001204 ENSCAFP00000001103 E 1162 D 481203 benign N -Contig21_chr10_22964856_22965302 202 chr10 22965058 ENSCAFT00000001219 ENSCAFP00000001115 P 6 Q 474465 benign N -Contig199_chr12_5083018_5084534 453 chr12 5083472 ENSCAFT00000001250 ENSCAFP00000001144 I 185 T 481729.481731 benign N.cfa04145=Phagosome.cfa04514=Cell adhesion molecules (CAMs).cfa04612=Antigen processing and presentation.cfa04672=Intestinal immune network for IgA production.cfa04940=Type I diabetes mellitus.cfa05140=Leishmaniasis.cfa05145=Toxoplasmosis.cfa05150=Staphylococcus aureus infection.cfa05152=Tuberculosis.cfa05164=Influenza A.cfa05166=HTLV-I infection.cfa05168=Herpes simplex infection.cfa05310=Asthma.cfa05320=Autoimmune thyroid disease.cfa05322=Systemic lupus erythematosus.cfa05323=Rheumatoid arthritis.cfa05330=Allograft rejection.cfa05332=Graft-versus-host disease.cfa05416=Viral myocarditis -Contig41_chr13_21629998_21630487 161 chr13 21630157 ENSCAFT00000001352 ENSCAFP00000001239 P 729 S 482026 possibly damaging cfa00565=Ether lipid metabolism -Contig16_chr13_21786766_21788016 169 chr13 21786927 ENSCAFT00000001363 ENSCAFP00000001249 V 1142 A 475084 benign cfa03022=Basal transcription factors -Contig60_chr1_60333035_60333884 731 chr1 60333755 ENSCAFT00000001421 ENSCAFP00000001307 V 400 I 484096 benign N -Contig44_chr13_24555640_24556298 499 chr13 24556139 ENSCAFT00000001523 ENSCAFP00000001400 N 660 S 475088 benign N -Contig153_chr12_5955114_5958935 2950 chr12 5958094 ENSCAFT00000001575 ENSCAFP00000001449 E 13 D 481744 benign cfa04141=Protein processing in endoplasmic reticulum -Contig146_chr13_25076435_25077249 723 chr13 25077165 ENSCAFT00000001587 ENSCAFP00000001461 T 9 S 482035 benign N -Contig81_chr13_25579918_25582207 874 chr13 25580772 ENSCAFT00000001597 ENSCAFP00000001469 E 62 G 609411 benign N -Contig159_chr10_28604683_28606028 753 chr10 28605433 ENSCAFT00000002056 ENSCAFP00000001903 S 79 P 610014 benign N -Contig30_chr11_29945215_29949829 3973 chr11 29949181 ENSCAFT00000002100 ENSCAFP00000001944 M 282 T U benign N -Contig102_chr10_29039231_29041280 829 chr10 29040065 ENSCAFT00000002110 ENSCAFP00000001953 R 311 Q 481249 unknown N -Contig187_chr1_78583588_78584279 250 chr1 78583839 ENSCAFT00000002175 ENSCAFP00000002014 K 176 R 476310 benign N -Contig199_chr1_79234891_79237527 384 chr1 79235278 ENSCAFT00000002259 ENSCAFP00000002095 V 403 A 484151 benign N -Contig119_chr12_12212738_12214663 1005 chr12 12213720 ENSCAFT00000002460 ENSCAFP00000002280 R 749 Q 481785 possibly damaging N -Contig119_chr12_12212738_12214663 918 chr12 12213633 ENSCAFT00000002460 ENSCAFP00000002280 R 778 Q 481785 benign N -Contig39_chr14_10730123_10732539 335 chr14 10730462 ENSCAFT00000002537 ENSCAFP00000002356 V 1179 E U benign N -Contig41_chr1_84886710_84894794 3494 chr1 84890207 ENSCAFT00000002577 ENSCAFP00000002394 E 1089 K 484157 possibly damaging N -Contig182_chr12_13881114_13883427 1690 chr12 13882828 ENSCAFT00000002578 ENSCAFP00000002395 S 99 G 608906 benign N -Contig34_chr11_48151988_48152712 198 chr11 48152205 ENSCAFT00000002660 ENSCAFP00000002468 C 587 R U possibly damaging N -Contig37_chr10_34118256_34119269 437 chr10 34118687 ENSCAFT00000002792 ENSCAFP00000002588 A 377 T 474523 benign N -Contig21_chr14_16091274_16093278 716 chr14 16091997 ENSCAFT00000002849 ENSCAFP00000002642 R 126 C 475216 probably damaging N -Contig57_chr1_90983602_90984717 559 chr1 90984158 ENSCAFT00000002999 ENSCAFP00000002781 A 226 V U benign N -Contig45_chr12_15798569_15798849 141 chr12 15798709 ENSCAFT00000003163 ENSCAFP00000002938 N 342 S 474921 benign cfa03040=Spliceosome -Contig83_chr12_17852905_17859596 2392 chr12 17855305 ENSCAFT00000003223 ENSCAFP00000002995 E 770 Q 474925 benign N -Contig41_chr12_18725392_18725889 169 chr12 18725560 ENSCAFT00000003307 ENSCAFP00000003070 R 80 Q 609995 benign N -Contig9_chr14_26125779_26127414 486 chr14 26126264 ENSCAFT00000003515 ENSCAFP00000003259 P 123 T 482316 benign N -Contig132_chr1_101565951_101566612 255 chr1 101566210 ENSCAFT00000003560 ENSCAFP00000003298 L 588 F U unknown N -Contig142_chr1_102093954_102094392 121 chr1 102094072 ENSCAFT00000003644 ENSCAFP00000003373 K 120 E 484216 benign cfa00290=Valine, leucine and isoleucine biosynthesis.cfa00970=Aminoacyl-tRNA biosynthesis -Contig129_chr14_34071666_34074617 2313 chr14 34073957 ENSCAFT00000003824 ENSCAFP00000003537 T 282 I 475249 probably damaging N -Contig147_chr14_34262125_34262938 340 chr14 34262468 ENSCAFT00000003840 ENSCAFP00000003553 I 70 V 482333 benign N -Contig52_chr12_36031985_36035244 1237 chr12 36033208 ENSCAFT00000004092 ENSCAFP00000003784 Y 564 H 474960 benign N -Contig176_chr1_105494865_105495258 119 chr1 105494995 ENSCAFT00000004103 ENSCAFP00000003793 A 406 V 484298 benign N -Contig60_chr11_63130652_63131816 702 chr11 63131349 ENSCAFT00000004208 ENSCAFP00000003892 V 260 I 481637 benign N -Contig9_chr10_53579958_53582510 688 chr10 53580646 ENSCAFT00000004253 ENSCAFP00000003937 S 191 G 100534006.100534007.474588 benign N -Contig93_chr14_38451661_38452163 221 chr14 38451882 ENSCAFT00000004311 ENSCAFP00000003990 A 420 V 482346 benign N -Contig70_chr12_42859511_42860010 180 chr12 42859693 ENSCAFT00000004464 ENSCAFP00000004126 P 7 S 481892 possibly damaging N -Contig28_chr12_43447144_43449156 1136 chr12 43448279 ENSCAFT00000004511 ENSCAFP00000004169 V 582 M 481893 benign N -Contig18_chr13_62535238_62535697 227 chr13 62535471 ENSCAFT00000004609 ENSCAFP00000004263 E 277 D 611755 benign N -Contig282_chr1_108960925_108962235 205 chr1 108961141 ENSCAFT00000004673 ENSCAFP00000004325 A 149 V 611817 benign N -Contig110_chr1_109196028_109197290 987 chr1 109197021 ENSCAFT00000004726 ENSCAFP00000004374 E 330 D 610047 benign cfa00230=Purine metabolism.cfa00240=Pyrimidine metabolism.cfa01100=Metabolic pathways.cfa03030=DNA replication.cfa03410=Base excision repair.cfa03420=Nucleotide excision repair.cfa03430=Mismatch repair.cfa03440=Homologous recombination.cfa05166=HTLV-I infection -Contig89_chr11_69097905_69099099 568 chr11 69098443 ENSCAFT00000004799 ENSCAFP00000004445 E 1317 G U benign N -Contig118_chr14_46155051_46155557 173 chr14 46155218 ENSCAFT00000004933 ENSCAFP00000004572 S 110 L 482382 benign cfa04621=NOD-like receptor signaling pathway.cfa05133=Pertussis -Contig54_chr12_51910786_51912716 682 chr12 51911460 ENSCAFT00000004993 ENSCAFP00000004630 H 2889 Y 474995 benign cfa03008=Ribosome biogenesis in eukaryotes -Contig95_chr10_67698730_67699605 267 chr10 67698997 ENSCAFT00000005126 ENSCAFP00000004751 P 45 L U benign N -Contig265_chr17_3177908_3178389 332 chr17 3178241 ENSCAFT00000005142 ENSCAFP00000004763 A 306 P 606804 benign N -Contig322_chr17_4977962_4979371 1122 chr17 4979079 ENSCAFT00000005225 ENSCAFP00000004836 T 319 I 475647 possibly damaging N -Contig48_chr11_71453437_71456331 1725 chr11 71455160 ENSCAFT00000005323 ENSCAFP00000004927 A 226 V U benign N -Contig51_chr16_4789440_4790118 484 chr16 4789915 ENSCAFT00000005467 ENSCAFP00000005065 Q 318 H U benign N -Contig32_chr12_57224809_57225619 146 chr12 57224960 ENSCAFT00000005496 ENSCAFP00000005093 A 273 T 481925 benign N -Contig6_chr14_59310933_59312532 615 chr14 59311551 ENSCAFT00000005518 ENSCAFP00000005112 Y 304 H 492302 probably damaging cfa02010=ABC transporters.cfa04971=Gastric acid secretion.cfa04972=Pancreatic secretion.cfa04976=Bile secretion -Contig89_chr11_74391566_74395656 2856 chr11 74394408 ENSCAFT00000005653 ENSCAFP00000031395 R 450 H 403417 benign cfa04145=Phagosome.cfa04620=Toll-like receptor signaling pathway.cfa05132=Salmonella infection.cfa05133=Pertussis.cfa05134=Legionellosis.cfa05140=Leishmaniasis.cfa05142=Chagas disease (American trypanosomiasis).cfa05144=Malaria.cfa05145=Toxoplasmosis.cfa05146=Amoebiasis.cfa05152=Tuberculosis.cfa05162=Measles.cfa05164=Influenza A.cfa05323=Rheumatoid arthritis -Contig15_chr1_109713951_109714808 645 chr1 109714594 ENSCAFT00000005746 ENSCAFP00000005319 R 783 K 476410 benign cfa00071=Fatty acid metabolism.cfa03320=PPAR signaling pathway.cfa04920=Adipocytokine signaling pathway -Contig47_chr17_11258085_11259619 360 chr17 11258455 ENSCAFT00000005749 ENSCAFP00000005322 V 778 L 610007 benign N -Contig1_chr19_4352123_4352541 311 chr19 4352427 ENSCAFT00000005832 ENSCAFP00000005401 H 7 Y 403584 benign cfa04060=Cytokine-cytokine receptor interaction.cfa04630=Jak-STAT signaling pathway.cfa04672=Intestinal immune network for IgA production.cfa05166=HTLV-I infection.cfa05168=Herpes simplex infection.cfa05323=Rheumatoid arthritis -Contig57_chr12_66915864_66916357 337 chr12 66916199 ENSCAFT00000005972 ENSCAFP00000005534 F 1242 L 475012 benign N -Contig36_chr17_16182220_16182772 282 chr17 16182494 ENSCAFT00000006025 ENSCAFP00000005583 V 13 I 482980 possibly damaging N -Contig64_chr19_15052202_15053292 240 chr19 15052443 ENSCAFT00000006114 ENSCAFP00000005658 I 175 V 483829 benign N -Contig169_chr12_69415779_69417261 1136 chr12 69416908 ENSCAFT00000006157 ENSCAFP00000005701 D 85 N 475021 possibly damaging N -Contig200_chr18_15803806_15804082 169 chr18 15803976 ENSCAFT00000006219 ENSCAFP00000005760 A 66 V 483261 benign cfa04972=Pancreatic secretion.cfa04978=Mineral absorption -Contig6_chr18_15814044_15814404 97 chr18 15814150 ENSCAFT00000006219 ENSCAFP00000005760 A 413 S 483261 benign cfa04972=Pancreatic secretion.cfa04978=Mineral absorption -Contig104_chr1_110433641_110434230 183 chr1 110433810 ENSCAFT00000006272 ENSCAFP00000005811 A 315 T 484394 benign cfa00280=Valine, leucine and isoleucine degradation.cfa00290=Valine, leucine and isoleucine biosynthesis.cfa00770=Pantothenate and CoA biosynthesis.cfa01100=Metabolic pathways -Contig52_chr18_17851226_17851871 284 chr18 17851509 ENSCAFT00000006453 ENSCAFP00000005976 T 311 M 475893 probably damaging N -Contig63_chr16_12167721_12168304 388 chr16 12168099 ENSCAFT00000006479 ENSCAFP00000006000 M 634 V U benign N -Contig101_chr20_4702659_4703738 441 chr20 4703092 ENSCAFT00000006507 ENSCAFP00000006027 G 635 D 484622 probably damaging cfa03030=DNA replication.cfa04110=Cell cycle -Contig53_chr19_21456428_21457881 408 chr19 21456840 ENSCAFT00000006669 ENSCAFP00000006174 R 247 L 476094 possibly damaging N -Contig58_chr18_19883250_19884312 250 chr18 19883498 ENSCAFT00000006689 ENSCAFP00000006194 * 503 Y 475897 benign N -Contig122_chr15_17034758_17035049 142 chr15 17034893 ENSCAFT00000006827 ENSCAFP00000006320 R 117 P U benign N -Contig131_chr18_20356930_20357227 113 chr18 20357041 ENSCAFT00000006891 ENSCAFP00000006378 V 55 L 610021 benign N -Contig117_chr22_5859195_5860740 654 chr22 5859850 ENSCAFT00000007130 ENSCAFP00000006603 S 139 N 485445 benign cfa04020=Calcium signaling pathway.cfa04080=Neuroactive ligand-receptor interaction -Contig91_chr17_23506302_23507213 322 chr17 23506624 ENSCAFT00000007145 ENSCAFP00000006614 V 1644 I 607961 benign N -Contig3_chr21_16586556_16586852 105 chr21 16586661 ENSCAFT00000007244 ENSCAFP00000006709 C 33 Y 476781 possibly damaging N -Contig62_chr2_22645987_22646907 357 chr2 22646352 ENSCAFT00000007375 ENSCAFP00000006833 V 657 F 403767 probably damaging cfa04977=Vitamin digestion and absorption -Contig52_chr15_18032498_18034281 880 chr15 18033373 ENSCAFT00000007440 ENSCAFP00000006895 P 227 A 482516 benign N -Contig131_chr23_6679385_6679850 198 chr23 6679592 ENSCAFT00000007467 ENSCAFP00000006915 R 136 G 485576 possibly damaging N -Contig157_chr22_10584088_10586765 232 chr22 10584326 ENSCAFT00000007484 ENSCAFP00000006926 M 610 T 609336 benign N -Contig164_chr2_24336024_24340161 2420 chr2 24338436 ENSCAFT00000007527 ENSCAFP00000006969 S 824 C 607108 probably damaging N -Contig109_chr2_24557417_24558710 808 chr2 24558229 ENSCAFT00000007553 ENSCAFP00000006994 L 606 V 487123 benign cfa03450=Non-homologous end-joining.cfa05340=Primary immunodeficiency -Contig194_chr15_18573761_18574204 142 chr15 18573904 ENSCAFT00000007697 ENSCAFP00000007130 V 381 I 475382 benign N -Contig133_chr23_9924894_9925887 125 chr23 9925016 ENSCAFT00000007703 ENSCAFP00000007136 P 355 S 477019 benign cfa03430=Mismatch repair.cfa03460=Fanconi anemia pathway.cfa05200=Pathways in cancer.cfa05210=Colorectal cancer.cfa05213=Endometrial cancer -Contig31_chr23_10199273_10203629 4073 chr23 10203350 ENSCAFT00000007747 ENSCAFP00000007179 A 1844 V U benign N -Contig21_chr23_10308212_10309269 513 chr23 10308732 ENSCAFT00000007774 ENSCAFP00000007206 K 72 R 477021 benign cfa04510=Focal adhesion.cfa04512=ECM-receptor interaction.cfa04514=Cell adhesion molecules (CAMs).cfa04810=Regulation of actin cytoskeleton.cfa05410=Hypertrophic cardiomyopathy (HCM).cfa05412=Arrhythmogenic right ventricular cardiomyopathy (ARVC).cfa05414=Dilated cardiomyopathy -Contig211_chr1_114924893_114925515 171 chr1 114925067 ENSCAFT00000007776 ENSCAFP00000007208 P 1988 A U benign N -Contig35_chr2_27160577_27161526 804 chr2 27161367 ENSCAFT00000007779 ENSCAFP00000007211 G 473 R 478007.478008 probably damaging cfa03060=Protein export.cfa04141=Protein processing in endoplasmic reticulum.cfa04145=Phagosome -Contig79_chr17_24285444_24286769 1263 chr17 24286694 ENSCAFT00000007859 ENSCAFP00000007285 S 209 T 483010 benign N -Contig74_chr23_10871047_10871362 70 chr23 10871116 ENSCAFT00000007951 ENSCAFP00000007365 I 474 V U benign N -Contig34_chr16_18928689_18932806 3409 chr16 18932072 ENSCAFT00000007959 ENSCAFP00000007370 A 3754 S 482810.611087 benign cfa00310=Lysine degradation -Contig52_chr21_24452521_24454405 725 chr21 24453245 ENSCAFT00000008012 ENSCAFP00000007418 M 289 T 485173 possibly damaging N -Contig261_chr1_115563599_115564561 560 chr1 115564156 ENSCAFT00000008063 ENSCAFP00000007465 A 63 T 484489 possibly damaging N -Contig62_chr19_41037398_41039465 159 chr19 41037564 ENSCAFT00000008142 ENSCAFP00000007541 C 744 Y 476128 possibly damaging N -Contig84_chr1_115960693_115962811 1467 chr1 115962120 ENSCAFT00000008198 ENSCAFP00000007593 W 61 R 612489 benign N -Contig135_chr23_14160194_14160717 270 chr23 14160468 ENSCAFT00000008413 ENSCAFP00000007796 V 298 I U benign N -Contig41_chr17_26203621_26205196 1407 chr17 26205028 ENSCAFT00000008540 ENSCAFP00000007913 H 172 R 483021 benign N -Contig260_chr1_116076701_116078120 746 chr1 116077446 ENSCAFT00000008586 ENSCAFP00000007956 T 2486 I 484499 benign N -Contig19_chr23_14811332_14815323 1987 chr23 14813327 ENSCAFT00000008588 ENSCAFP00000007958 S 690 L U unknown N -Contig180_chr2_35061773_35062172 166 chr2 35061941 ENSCAFT00000008673 ENSCAFP00000008039 T 920 M 478018 probably damaging N -Contig106_chr21_26153874_26154496 107 chr21 26153984 ENSCAFT00000008678 ENSCAFP00000008044 A 458 T 485188 benign N -Contig3_chr19_45625337_45630123 2563 chr19 45627887 ENSCAFT00000008728 ENSCAFP00000008094 V 1264 I U benign N -Contig51_chr22_48760401_48761638 636 chr22 48761047 ENSCAFT00000008769 ENSCAFP00000008132 R 1071 K 485523 benign cfa02010=ABC transporters.cfa04976=Bile secretion -Contig10_chr15_21173640_21174011 212 chr15 21173839 ENSCAFT00000008831 ENSCAFP00000008192 V 191 I 475398 benign N -Contig6_chr24_14680423_14681438 782 chr24 14681208 ENSCAFT00000009074 ENSCAFP00000008417 H 562 R 485769 possibly damaging cfa04330=Notch signaling pathway -Contig60_chr9_4528464_4529207 262 chr9 4528727 ENSCAFT00000009114 ENSCAFP00000008453 C 24 F 483354 possibly damaging N -Contig54_chr15_29510545_29512205 400 chr15 29510955 ENSCAFT00000009614 ENSCAFP00000008928 H 190 R 475416 benign N -Contig46_chr25_5067588_5068089 39 chr25 5067627 ENSCAFT00000009698 ENSCAFP00000009003 S 17 N 486001 benign N -Contig126_chr25_5114359_5115799 643 chr25 5114996 ENSCAFT00000009710 ENSCAFP00000009013 R 1952 C 486002 possibly damaging N -Contig41_chr26_3455305_3455893 329 chr26 3455620 ENSCAFT00000010094 ENSCAFP00000009363 S 909 A 486223 benign cfa00230=Purine metabolism.cfa00240=Pyrimidine metabolism.cfa01100=Metabolic pathways.cfa03030=DNA replication.cfa03410=Base excision repair.cfa03420=Nucleotide excision repair.cfa05166=HTLV-I infection -Contig55_chr26_3463883_3465235 1074 chr26 3464998 ENSCAFT00000010094 ENSCAFP00000009363 R 1273 S 486223 benign cfa00230=Purine metabolism.cfa00240=Pyrimidine metabolism.cfa01100=Metabolic pathways.cfa03030=DNA replication.cfa03410=Base excision repair.cfa03420=Nucleotide excision repair.cfa05166=HTLV-I infection -Contig63_chr26_3467460_3468420 195 chr26 3467661 ENSCAFT00000010094 ENSCAFP00000009363 E 1542 Q 486223 benign cfa00230=Purine metabolism.cfa00240=Pyrimidine metabolism.cfa01100=Metabolic pathways.cfa03030=DNA replication.cfa03410=Base excision repair.cfa03420=Nucleotide excision repair.cfa05166=HTLV-I infection -Contig13_chr16_32259141_32259752 344 chr16 32259472 ENSCAFT00000010141 ENSCAFP00000009407 I 326 T 482857 benign cfa04360=Axon guidance -Contig59_chr21_32994329_32995926 1195 chr21 32995538 ENSCAFT00000010439 ENSCAFP00000009680 H 230 R 610992 benign N -Contig39_chr20_24938452_24941620 1292 chr20 24939734 ENSCAFT00000010496 ENSCAFP00000009730 S 28 P 415126 benign cfa04380=Osteoclast differentiation.cfa04916=Melanogenesis.cfa05200=Pathways in cancer.cfa05218=Melanoma -Contig2_chr18_28546360_28546760 277 chr18 28546640 ENSCAFT00000010516 ENSCAFP00000009748 P 471 S U benign N -Contig23_chr20_25560598_25562858 928 chr20 25561520 ENSCAFT00000010531 ENSCAFP00000009762 T 749 I 484693 benign N -Contig209_chr18_28672330_28672791 376 chr18 28672689 ENSCAFT00000010559 ENSCAFP00000009790 A 33 D 483405 unknown N -Contig261_chr18_28694652_28696968 1808 chr18 28696427 ENSCAFT00000010559 ENSCAFP00000009790 P 1443 L 483405 possibly damaging N -Contig30_chr25_12008255_12009009 151 chr25 12008417 ENSCAFT00000010593 ENSCAFP00000009822 Q 151 H U benign N -Contig46_chr29_3065854_3067420 1265 chr29 3067078 ENSCAFT00000010616 ENSCAFP00000009842 V 3253 A 474176 benign cfa03450=Non-homologous end-joining.cfa04110=Cell cycle -Contig59_chr28_3755477_3757019 935 chr28 3756419 ENSCAFT00000010630 ENSCAFP00000009853 R 923 Q 486770 possibly damaging N -Contig90_chr29_6393993_6395503 951 chr29 6394948 ENSCAFT00000010829 ENSCAFP00000010033 Y 257 C 486944 benign N -Contig42_chr16_39015800_39016389 319 chr16 39016119 ENSCAFT00000010865 ENSCAFP00000010068 D 71 N U possibly damaging N -Contig95_chr21_34533214_34535079 1133 chr21 34534321 ENSCAFT00000010931 ENSCAFP00000010131 E 118 G 485368 benign N -Contig82_chr21_34524815_34525170 247 chr21 34525072 ENSCAFT00000010931 ENSCAFP00000010131 Q 499 R 485368 benign N -Contig32_chr24_22727492_22727986 147 chr24 22727648 ENSCAFT00000010977 ENSCAFP00000010173 P 278 L U possibly damaging N -Contig45_chr16_42405571_42406148 269 chr16 42405837 ENSCAFT00000010988 ENSCAFP00000010184 H 406 R 482891 benign cfa04145=Phagosome -Contig66_chr15_43321121_43321872 642 chr15 43321764 ENSCAFT00000011187 ENSCAFP00000010364 F 543 L 475441 benign N -Contig184_chr27_5103641_5104991 275 chr27 5103979 ENSCAFT00000011380 ENSCAFP00000010541 V 864 A U benign N -Contig88_chr17_39320200_39320765 204 chr17 39320404 ENSCAFT00000011397 ENSCAFP00000010558 S 1911 N 475750 benign cfa04110=Cell cycle.cfa04114=Oocyte meiosis.cfa04120=Ubiquitin mediated proteolysis.cfa04914=Progesterone-mediated oocyte maturation.cfa05166=HTLV-I infection -Contig8_chr16_47195242_47195504 193 chr16 47195429 ENSCAFT00000011721 ENSCAFP00000010862 S 4369 W 475621 benign N -Contig84_chr27_5882441_5882771 145 chr27 5882579 ENSCAFT00000011730 ENSCAFP00000010871 C 289 S 486534 benign N -Contig42_chr24_25316755_25317362 320 chr24 25317091 ENSCAFT00000011771 ENSCAFP00000010910 G 22 S 477193 benign N -Contig45_chr24_25318544_25319490 734 chr24 25319299 ENSCAFT00000011771 ENSCAFP00000010910 V 187 A 477193 benign N -Contig31_chr24_25434125_25435133 853 chr24 25434975 ENSCAFT00000011789 ENSCAFP00000010928 S 91 I 609978 benign N -Contig20_chr3_10579133_10580085 600 chr3 10579729 ENSCAFT00000011968 ENSCAFP00000011099 K 165 E 488881 benign cfa00760=Nicotinate and nicotinamide metabolism.cfa04146=Peroxisome -Contig45_chr2_54585564_54588038 1047 chr2 54586611 ENSCAFT00000012081 ENSCAFP00000011198 T 969 M 478082 benign cfa04621=NOD-like receptor signaling pathway -Contig156_chr1_122375741_122376035 168 chr1 122375904 ENSCAFT00000012133 ENSCAFP00000011248 R 628 K 611998 benign N -Contig153_chr1_124036982_124040108 1588 chr1 124038585 ENSCAFT00000012159 ENSCAFP00000011272 A 887 T 484609 benign N -Contig32_chr24_26900375_26900913 394 chr24 26900761 ENSCAFT00000012254 ENSCAFP00000011358 H 51 Y U benign N -Contig103_chr16_48829082_48829675 123 chr16 48829205 ENSCAFT00000012381 ENSCAFP00000011471 E 369 G 475632 possibly damaging N -Contig25_chr18_41490135_41493501 534 chr18 41490665 ENSCAFT00000012414 ENSCAFP00000011503 R 703 C 483489 probably damaging cfa04520=Adherens junction.cfa04670=Leukocyte transendothelial migration -Contig69_chr16_49314879_49317228 1810 chr16 49316689 ENSCAFT00000012456 ENSCAFP00000011541 P 431 L 475636 probably damaging cfa00565=Ether lipid metabolism -Contig71_chr17_42734055_42736474 2240 chr17 42736298 ENSCAFT00000012478 ENSCAFP00000011561 R 307 Q 483083 benign cfa00830=Retinol metabolism -Contig17_chr17_43378842_43379885 305 chr17 43379148 ENSCAFT00000012676 ENSCAFP00000011740 T 196 M U probably damaging N -Contig195_chr27_7047911_7049009 555 chr27 7048468 ENSCAFT00000012942 ENSCAFP00000011978 R 881 L 477608 benign N -Contig112_chr30_4254316_4256576 1478 chr30 4255785 ENSCAFT00000012974 ENSCAFP00000012007 V 2939 I U benign N -Contig43_chr20_39124486_39124798 114 chr20 39124607 ENSCAFT00000013097 ENSCAFP00000012118 G 325 R 607274 possibly damaging N -Contig96_chr16_55849292_55849592 194 chr16 55849494 ENSCAFT00000013360 ENSCAFP00000012363 A 41 S 482932 benign cfa04060=Cytokine-cytokine receptor interaction.cfa04150=mTOR signaling pathway.cfa04510=Focal adhesion.cfa05200=Pathways in cancer.cfa05211=Renal cell carcinoma.cfa05212=Pancreatic cancer.cfa05219=Bladder cancer -Contig91_chr17_51684551_51689453 4154 chr17 51688687 ENSCAFT00000013395 ENSCAFP00000012395 P 306 L 475784 benign N -Contig192_chr26_12794366_12794712 143 chr26 12794506 ENSCAFT00000014076 ENSCAFP00000013021 V 2478 I 477486 benign N -Contig191_chr31_30109152_30109760 212 chr31 30109363 ENSCAFT00000014113 ENSCAFP00000013055 A 1813 T 487735 probably damaging N -Contig116_chr24_29683980_29684819 101 chr24 29684079 ENSCAFT00000014115 ENSCAFP00000013057 R 836 C 485868 probably damaging N -Contig8_chr32_9413601_9414435 74 chr32 9413675 ENSCAFT00000014257 ENSCAFP00000013183 N 236 K 478452 probably damaging cfa00270=Cysteine and methionine metabolism -Contig90_chr21_43253791_43254774 189 chr21 43253974 ENSCAFT00000014325 ENSCAFP00000013248 I 758 V U benign N -Contig76_chr24_30292767_30294101 552 chr24 30293321 ENSCAFT00000014346 ENSCAFP00000013267 A 349 T U benign N -Contig21_chr25_37121451_37122072 177 chr25 37121616 ENSCAFT00000014616 ENSCAFP00000013518 V 157 L 486118 benign N -Contig15_chr36_6357141_6362626 5226 chr36 6362346 ENSCAFT00000014702 ENSCAFP00000013598 N 138 K 607626 possibly damaging N -Contig64_chr17_54734453_54734993 109 chr17 54734552 ENSCAFT00000014707 ENSCAFP00000013603 S 302 L 483124 benign N -Contig91_chr18_46134014_46136042 330 chr18 46134347 ENSCAFT00000014736 ENSCAFP00000013630 A 214 S 483635 benign cfa00561=Glycerolipid metabolism.cfa00564=Glycerophospholipid metabolism.cfa01100=Metabolic pathways.cfa04070=Phosphatidylinositol signaling system -Contig111_chr31_31237314_31238628 920 chr31 31238220 ENSCAFT00000014822 ENSCAFP00000013714 S 143 C 478408 benign N -Contig77_chr38_3502296_3503058 349 chr38 3502639 ENSCAFT00000015260 ENSCAFP00000014122 K 666 E 478932 benign N -Contig59_chr38_3998294_3999004 369 chr38 3998672 ENSCAFT00000015347 ENSCAFP00000014201 V 791 I U benign N -Contig123_chr31_34367825_34368648 664 chr31 34368468 ENSCAFT00000015534 ENSCAFP00000014373 H 204 Q U possibly damaging N -Contig43_chr30_11874641_11875130 198 chr30 11874850 ENSCAFT00000015654 ENSCAFP00000014488 R 3422 C U benign N -Contig9_chr20_40741488_40743247 1027 chr20 40742525 ENSCAFT00000015816 ENSCAFP00000014638 M 183 V 484744 benign N -Contig137_chr5_7048977_7051042 863 chr5 7049840 ENSCAFT00000015844 ENSCAFP00000014662 A 311 V 479391 benign N -Contig9_chr28_17675067_17680985 1564 chr28 17676618 ENSCAFT00000015971 ENSCAFP00000014772 R 515 P 477805 unknown N -Contig126_chr30_12286682_12287475 407 chr30 12287101 ENSCAFT00000016062 ENSCAFP00000014854 V 450 I 487517 benign cfa00052=Galactose metabolism.cfa00500=Starch and sucrose metabolism.cfa01100=Metabolic pathways -Contig127_chr30_12287497_12288447 608 chr30 12288095 ENSCAFT00000016062 ENSCAFP00000014854 T 495 M 487517 benign cfa00052=Galactose metabolism.cfa00500=Starch and sucrose metabolism.cfa01100=Metabolic pathways -Contig13_chr38_5058391_5058630 66 chr38 5058458 ENSCAFT00000016099 ENSCAFP00000014887 F 412 L 478943 benign N -Contig169_chr35_19985467_19986000 455 chr35 19985921 ENSCAFT00000016165 ENSCAFP00000014950 T 175 I 478733 benign N -Contig2_chr35_21794536_21795092 291 chr35 21794865 ENSCAFT00000016208 ENSCAFP00000014992 V 84 A 488238 benign cfa00561=Glycerolipid metabolism.cfa00564=Glycerophospholipid metabolism.cfa01100=Metabolic pathways -Contig141_chr26_19278751_19279229 364 chr26 19279128 ENSCAFT00000016284 ENSCAFP00000015064 N 29 S 404011 benign cfa00564=Glycerophospholipid metabolism.cfa00565=Ether lipid metabolism.cfa00590=Arachidonic acid metabolism.cfa00591=Linoleic acid metabolism.cfa00592=alpha-Linolenic acid metabolism.cfa01100=Metabolic pathways.cfa04010=MAPK signaling pathway.cfa04270=Vascular smooth muscle contraction.cfa04370=VEGF signaling pathway.cfa04664=Fc epsilon RI signaling pathway.cfa04724=Glutamatergic synapse.cfa04730=Long-term depression.cfa04912=GnRH signaling pathway.cfa04972=Pancreatic secretion.cfa04975=Fat digestion and absorption.cfa05145=Toxoplasmosis -Contig179_chr3_40781459_40782026 285 chr3 40781763 ENSCAFT00000016410 ENSCAFP00000015182 D 1174 N 488699 benign N -Contig237_chr21_53631024_53632458 203 chr21 53631227 ENSCAFT00000016459 ENSCAFP00000015227 C 47 W 403799 probably damaging cfa04664=Fc epsilon RI signaling pathway.cfa05310=Asthma -Contig186_chr2_71203100_71204111 202 chr2 71203303 ENSCAFT00000016485 ENSCAFP00000015250 S 188 T 478144 benign cfa00330=Arginine and proline metabolism.cfa01100=Metabolic pathways -Contig1_chr28_18779291_18780149 325 chr28 18779619 ENSCAFT00000016578 ENSCAFP00000015340 N 245 Y U probably damaging N -Contig166_chr4_77425871_77426835 797 chr4 77426667 ENSCAFT00000016670 ENSCAFP00000015429 D 115 G 479370 benign cfa00970=Aminoacyl-tRNA biosynthesis -Contig35_chr24_36806524_36807086 367 chr24 36806891 ENSCAFT00000016727 ENSCAFP00000015478 F 345 L 485910 benign N -Contig45_chr37_8610877_8611425 194 chr37 8611078 ENSCAFT00000016761 ENSCAFP00000015511 D 2849 N 488452 possibly damaging N -Contig39_chr28_19446540_19447838 1068 chr28 19447566 ENSCAFT00000016791 ENSCAFP00000015537 A 1596 E U benign N -Contig161_chr18_51013230_51015381 1494 chr18 51014735 ENSCAFT00000016827 ENSCAFP00000015571 L 977 V 475999 benign N -Contig25_chr28_19619108_19621267 1728 chr28 19620832 ENSCAFT00000016848 ENSCAFP00000034237 I 108 V 609723 benign N -Contig33_chr20_42063173_42064259 623 chr20 42063789 ENSCAFT00000017070 ENSCAFP00000015794 V 179 M U probably damaging N -Contig39_chr38_14681397_14682234 384 chr38 14681781 ENSCAFT00000017072 ENSCAFP00000015796 H 282 N 488593 unknown N -Contig6_chr32_27303975_27304541 425 chr32 27304407 ENSCAFT00000017178 ENSCAFP00000015896 S 354 T 610098 benign N -Contig173_chr38_17709765_17711029 179 chr38 17709941 ENSCAFT00000017240 ENSCAFP00000015955 G 464 R U benign N -Contig52_chr32_27452924_27453332 91 chr32 27452999 ENSCAFT00000017249 ENSCAFP00000015964 A 22 S U benign N -Contig319_chr34_14684259_14684663 353 chr34 14684613 ENSCAFT00000017314 ENSCAFP00000016025 R 5 Q 478632 benign N -Contig32_chr2_72269353_72269814 349 chr2 72269708 ENSCAFT00000017327 ENSCAFP00000016037 P 853 L 487317 possibly damaging N -Contig206_chr9_18720001_18720613 155 chr9 18720160 ENSCAFT00000017373 ENSCAFP00000016082 D 1621 E 480456 benign cfa02010=ABC transporters -Contig35_chr37_10562149_10562621 74 chr37 10562222 ENSCAFT00000017444 ENSCAFP00000016153 I 975 V 478858 benign cfa04727=GABAergic synapse -Contig1_chr30_12655575_12656916 370 chr30 12655947 ENSCAFT00000017777 ENSCAFP00000016457 L 639 M 608886 probably damaging N -Contig63_chr27_23738716_23739879 1131 chr27 23739850 ENSCAFT00000017892 ENSCAFP00000016566 P 642 L 486627 benign N -Contig44_chr28_28123120_28124627 1348 chr28 28124495 ENSCAFT00000017967 ENSCAFP00000016639 V 261 A 477827 benign N -Contig23_chrX_6416128_6417014 455 chrX 6416585 ENSCAFT00000018017 ENSCAFP00000016684 H 111 R 491733 possibly damaging N -Contig31_chr7_8282189_8286932 3631 chr7 8285875 ENSCAFT00000018057 ENSCAFP00000016724 L 655 P 490260 benign N -Contig318_chr6_8706066_8706350 76 chr6 8706142 ENSCAFT00000018106 ENSCAFP00000016769 K 318 N 607700 possibly damaging cfa04062=Chemokine signaling pathway.cfa04145=Phagosome.cfa04380=Osteoclast differentiation.cfa04666=Fc gamma R-mediated phagocytosis.cfa04670=Leukocyte transendothelial migration.cfa05140=Leishmaniasis -Contig36_chr32_33046881_33048369 1118 chr32 33047990 ENSCAFT00000018307 ENSCAFP00000016954 E 555 A 403657 benign cfa04010=MAPK signaling pathway.cfa04012=ErbB signaling pathway.cfa04060=Cytokine-cytokine receptor interaction.cfa04144=Endocytosis.cfa04510=Focal adhesion.cfa04540=Gap junction.cfa04810=Regulation of actin cytoskeleton.cfa05160=Hepatitis C.cfa05200=Pathways in cancer.cfa05212=Pancreatic cancer.cfa05213=Endometrial cancer.cfa05214=Glioma.cfa05215=Prostate cancer.cfa05218=Melanoma.cfa05219=Bladder cancer.cfa05223=Non-small cell lung cancer -Contig32_chr5_14476595_14477214 347 chr5 14476948 ENSCAFT00000018571 ENSCAFP00000017201 V 209 A 610296 benign N -Contig88_chr34_19031138_19031937 343 chr34 19031477 ENSCAFT00000018684 ENSCAFP00000017309 K 670 R 478645 benign cfa00280=Valine, leucine and isoleucine degradation.cfa01100=Metabolic pathways -Contig188_chr25_47927372_47928085 557 chr25 47927941 ENSCAFT00000018758 ENSCAFP00000017379 K 228 R 486167 benign cfa00561=Glycerolipid metabolism.cfa00564=Glycerophospholipid metabolism.cfa01100=Metabolic pathways.cfa04070=Phosphatidylinositol signaling system -Contig36_chr37_12924359_12924740 86 chr37 12924449 ENSCAFT00000018786 ENSCAFP00000017406 D 187 Y 608849 probably damaging cfa00280=Valine, leucine and isoleucine degradation.cfa00350=Tyrosine metabolism.cfa00380=Tryptophan metabolism.cfa00750=Vitamin B6 metabolism.cfa00760=Nicotinate and nicotinamide metabolism.cfa00982=Drug metabolism - cytochrome P450.cfa01100=Metabolic pathways -Contig3_chr34_19471626_19472377 337 chr34 19471956 ENSCAFT00000018788 ENSCAFP00000017408 R 239 Q 488096 possibly damaging N -Contig80_chr4_11155760_11156827 952 chr4 11156735 ENSCAFT00000018796 ENSCAFP00000017416 S 661 N 479204 benign cfa00564=Glycerophospholipid metabolism.cfa04146=Peroxisome -Contig56_chr8_7093747_7095987 683 chr8 7094428 ENSCAFT00000018813 ENSCAFP00000017431 P 126 R 490620 unknown N -Contig82_chr8_7111986_7114065 1351 chr8 7113329 ENSCAFT00000018871 ENSCAFP00000017488 R 608 H 480255 probably damaging cfa00010=Glycolysis / Gluconeogenesis.cfa00020=Citrate cycle (TCA cycle).cfa00620=Pyruvate metabolism.cfa01100=Metabolic pathways.cfa03320=PPAR signaling pathway.cfa04910=Insulin signaling pathway.cfa04920=Adipocytokine signaling pathway.cfa04964=Proximal tubule bicarbonate reclamation -Contig172_chr33_28585454_28586084 228 chr33 28585687 ENSCAFT00000018884 ENSCAFP00000017500 R 36 K 478584 benign N -Contig113_chr5_16682954_16684491 688 chr5 16683641 ENSCAFT00000018997 ENSCAFP00000017606 F 41 L 489360 benign N -Contig36_chr28_31449413_31452160 2111 chr28 31451506 ENSCAFT00000019041 ENSCAFP00000017650 P 252 H 477834 benign cfa04144=Endocytosis -Contig80_chr3_55628026_55628800 392 chr3 55628403 ENSCAFT00000019070 ENSCAFP00000017677 R 805 K 403913 benign cfa00480=Glutathione metabolism.cfa01100=Metabolic pathways.cfa04614=Renin-angiotensin system.cfa04640=Hematopoietic cell lineage -Contig99_chr7_11816365_11819255 806 chr7 11817201 ENSCAFT00000019101 ENSCAFP00000017707 C 305 G 490276 benign N -Contig114_chr4_12744102_12745318 148 chr4 12744256 ENSCAFT00000019279 ENSCAFP00000017880 I 700 V U benign N -Contig82_chr7_13056757_13058281 974 chr7 13057742 ENSCAFT00000019316 ENSCAFP00000017915 S 283 N 609933 benign cfa00564=Glycerophospholipid metabolism -Contig280_chr25_51367477_51367885 70 chr25 51367542 ENSCAFT00000019610 ENSCAFP00000018191 S 97 L U benign N -Contig35_chr20_43508791_43509352 460 chr20 43509254 ENSCAFT00000019627 ENSCAFP00000018204 V 77 A 608455 benign cfa00190=Oxidative phosphorylation.cfa01100=Metabolic pathways.cfa04260=Cardiac muscle contraction.cfa05010=Alzheimer's disease.cfa05012=Parkinson's disease.cfa05016=Huntington's disease -Contig36_chr20_43509362_43510980 1484 chr20 43510860 ENSCAFT00000019627 ENSCAFP00000018204 D 181 N 608455 benign cfa00190=Oxidative phosphorylation.cfa01100=Metabolic pathways.cfa04260=Cardiac muscle contraction.cfa05010=Alzheimer's disease.cfa05012=Parkinson's disease.cfa05016=Huntington's disease -Contig59_chr25_51807653_51809044 1064 chr25 51808739 ENSCAFT00000019760 ENSCAFP00000018330 R 235 K U benign N -Contig96_chr36_17712997_17714068 556 chr36 17713559 ENSCAFT00000019807 ENSCAFP00000018374 T 423 I 478789 benign N -Contig163_chr28_34927368_34929275 1128 chr28 34928486 ENSCAFT00000019866 ENSCAFP00000018425 A 2659 T 477850 benign N -Contig74_chr33_31230250_31230874 246 chr33 31230493 ENSCAFT00000019938 ENSCAFP00000018492 G 113 S 488016 probably damaging N -Contig130_chr7_15553315_15558308 3186 chr7 15556497 ENSCAFT00000020009 ENSCAFP00000018561 K 1513 N U benign N -Contig160_chr2_76816412_76817166 354 chr2 76816779 ENSCAFT00000020143 ENSCAFP00000018683 I 190 V 478173 benign N -Contig219_chr33_31871568_31871771 81 chr33 31871646 ENSCAFT00000020195 ENSCAFP00000018733 N 346 H U probably damaging N -Contig254_chr24_50001599_50001992 151 chr24 50001767 ENSCAFT00000020266 ENSCAFP00000018803 R 239 Q U benign N -Contig40_chr37_15283702_15285945 1908 chr37 15285621 ENSCAFT00000020408 ENSCAFP00000018937 A 809 G U unknown N -Contig59_chr20_43702094_43703358 450 chr20 43702540 ENSCAFT00000020438 ENSCAFP00000018965 S 217 A U benign N -Contig75_chr3_57465650_57466327 377 chr3 57466017 ENSCAFT00000020863 ENSCAFP00000019371 L 205 F 609716 probably damaging N -Contig155_chr2_79195879_79199423 2014 chr2 79197892 ENSCAFT00000021154 ENSCAFP00000019645 G 549 S U benign N -Contig155_chr2_79195879_79199423 3136 chr2 79199014 ENSCAFT00000021154 ENSCAFP00000019645 R 923 C U probably damaging N -Contig59_chr5_19784971_19787384 1310 chr5 19786293 ENSCAFT00000021222 ENSCAFP00000019707 V 171 I 479428.489393 benign cfa03320=PPAR signaling pathway -Contig41_chr30_14304605_14305465 206 chr30 14304816 ENSCAFT00000021612 ENSCAFP00000020069 A 157 G U benign N -Contig47_chr20_45043804_45044476 317 chr20 45044117 ENSCAFT00000021659 ENSCAFP00000020114 V 281 I 609323 benign N -Contig46_chr4_22849549_22849829 123 chr4 22849673 ENSCAFT00000021752 ENSCAFP00000020204 V 646 M U probably damaging N -Contig141_chr7_22360980_22361690 242 chr7 22361233 ENSCAFT00000021777 ENSCAFP00000020227 K 1862 R U unknown N -Contig59_chr30_14758622_14760653 1186 chr30 14759817 ENSCAFT00000021792 ENSCAFP00000020241 S 284 R 609256 benign N -Contig57_chr27_39696388_39698349 1026 chr27 39697428 ENSCAFT00000021846 ENSCAFP00000020293 Q 588 R 477699 benign cfa04610=Complement and coagulation cascades -Contig83_chr27_40151814_40153141 738 chr27 40152551 ENSCAFT00000022064 ENSCAFP00000020490 S 191 R 477702 benign N -Contig105_chr6_11901733_11904968 406 chr6 11902145 ENSCAFT00000022289 ENSCAFP00000020701 Y 55 H 479732 probably damaging cfa04621=NOD-like receptor signaling pathway -Contig43_chr36_25298890_25299602 235 chr36 25299132 ENSCAFT00000022319 ENSCAFP00000020728 E 11731 K 610299.610339 unknown N -Contig3_chr36_25193150_25202641 2802 chr36 25195983 ENSCAFT00000022319 ENSCAFP00000020728 I 30137 V 610299.610339 benign N -Contig585_chr3_61201332_61201904 139 chr3 61201468 ENSCAFT00000022529 ENSCAFP00000020918 L 97 V 479067 benign cfa00071=Fatty acid metabolism.cfa03320=PPAR signaling pathway.cfa04920=Adipocytokine signaling pathway -Contig1_chr20_46714929_46715937 434 chr20 46715327 ENSCAFT00000022571 ENSCAFP00000020958 A 18 P 484804 unknown N -Contig7_chr8_29376780_29378260 158 chr8 29376937 ENSCAFT00000022576 ENSCAFP00000020962 T 852 A 490678 benign N -Contig74_chr8_29656170_29657212 595 chr8 29656776 ENSCAFT00000022697 ENSCAFP00000021080 E 974 K 490682 possibly damaging cfa04010=MAPK signaling pathway.cfa04012=ErbB signaling pathway.cfa04062=Chemokine signaling pathway.cfa04320=Dorso-ventral axis formation.cfa04510=Focal adhesion.cfa04540=Gap junction.cfa04630=Jak-STAT signaling pathway.cfa04650=Natural killer cell mediated cytotoxicity.cfa04660=T cell receptor signaling pathway.cfa04662=B cell receptor signaling pathway.cfa04664=Fc epsilon RI signaling pathway.cfa04722=Neurotrophin signaling pathway.cfa04810=Regulation of actin cytoskeleton.cfa04910=Insulin signaling pathway.cfa04912=GnRH signaling pathway.cfa05160=Hepatitis C.cfa05200=Pathways in cancer.cfa05211=Renal cell carcinoma.cfa05213=Endometrial cancer.cfa05214=Glioma.cfa05215=Prostate cancer.cfa05220=Chronic myeloid leukemia.cfa05221=Acute myeloid leukemia.cfa05223=Non-small cell lung cancer -Contig45_chr4_25273541_25274402 608 chr4 25274121 ENSCAFT00000022760 ENSCAFP00000021140 S 30 F 479239 probably damaging cfa04360=Axon guidance -Contig96_chr37_26111249_26111450 108 chr37 26111364 ENSCAFT00000022884 ENSCAFP00000021256 K 260 R 478902 benign cfa03450=Non-homologous end-joining -Contig196_chr3_62434637_62435063 203 chr3 62434823 ENSCAFT00000022915 ENSCAFP00000021284 L 174 P 488785 benign N -Contig15_chr6_12238116_12239737 1287 chr6 12239420 ENSCAFT00000022961 ENSCAFP00000021328 E 165 K 479735 benign N -Contig175_chr5_27267391_27267870 57 chr5 27267451 ENSCAFT00000023032 ENSCAFP00000021395 N 1094 S 479450 benign cfa04110=Cell cycle.cfa04115=p53 signaling pathway.cfa04210=Apoptosis.cfa05166=HTLV-I infection -Contig110_chr20_47192181_47193618 93 chr20 47192262 ENSCAFT00000023054 ENSCAFP00000021407 A 308 P 484814 probably damaging N -Contig9_chr4_26730063_26730585 245 chr4 26730316 ENSCAFT00000023087 ENSCAFP00000021437 E 153 D 489044 benign N -Contig1_chr34_35420831_35421658 73 chr34 35420908 ENSCAFT00000023111 ENSCAFP00000021457 V 251 I 488144 benign N -Contig199_chr2_79696091_79697603 751 chr2 79696840 ENSCAFT00000023253 ENSCAFP00000021593 D 54 A U possibly damaging N -Contig146_chrX_38946913_38947473 307 chrX 38947225 ENSCAFT00000023268 ENSCAFP00000021608 L 160 V 612457 possibly damaging N -Contig63_chr9_23532151_23533554 1297 chr9 23533421 ENSCAFT00000023438 ENSCAFP00000021767 Q 279 R 490958 benign N -Contig89_chr5_32060784_32061151 293 chr5 32061079 ENSCAFT00000023913 ENSCAFP00000022199 W 106 * 489430 probably damaging N -Contig15_chr3_65640843_65642155 1100 chr3 65641942 ENSCAFT00000023933 ENSCAFP00000022218 V 383 A 479080 benign N -Contig49_chr26_33571748_33572620 689 chr26 33572452 ENSCAFT00000024062 ENSCAFP00000022339 R 478 W 486440 benign N -Contig96_chr20_48055741_48057197 524 chr20 48056259 ENSCAFT00000024100 ENSCAFP00000022374 R 172 Q U benign N -Contig104_chr20_48062263_48062546 210 chr20 48062492 ENSCAFT00000024100 ENSCAFP00000022374 V 775 G U probably damaging N -Contig33_chr37_28794567_28796956 2144 chr37 28796718 ENSCAFT00000024137 ENSCAFP00000022408 E 279 Q 488536 benign N -Contig24_chr7_32005266_32005660 212 chr7 32005479 ENSCAFT00000024154 ENSCAFP00000022424 T 92 M U probably damaging N -Contig174_chr18_56896461_56897594 274 chr18 56896734 ENSCAFT00000024637 ENSCAFP00000022858 V 157 L 483779 benign cfa04130=SNARE interactions in vesicular transport -Contig55_chr20_48811642_48812027 299 chr20 48811941 ENSCAFT00000024761 ENSCAFP00000022970 H 993 R 476678 benign N -Contig220_chr18_56925351_56927006 920 chr18 56926246 ENSCAFT00000024787 ENSCAFP00000022995 P 420 Q 476051 possibly damaging cfa03022=Basal transcription factors.cfa05168=Herpes simplex infection -Contig12_chr8_39044824_39045409 359 chr8 39045181 ENSCAFT00000024804 ENSCAFP00000023011 I 280 T 612894 possibly damaging N -Contig23_chr3_72567678_72570858 1313 chr3 72568976 ENSCAFT00000024846 ENSCAFP00000023051 L 298 P 488826 benign N -Contig190_chr7_35896301_35896811 232 chr7 35896528 ENSCAFT00000024892 ENSCAFP00000023095 R 3 L 480092 unknown cfa00020=Citrate cycle (TCA cycle).cfa01100=Metabolic pathways.cfa05200=Pathways in cancer.cfa05211=Renal cell carcinoma -Contig119_chr20_49114009_49114654 266 chr20 49114270 ENSCAFT00000024934 ENSCAFP00000023135 F 339 L 484849 benign N -Contig47_chr8_41487304_41487682 210 chr8 41487515 ENSCAFT00000025088 ENSCAFP00000023286 S 1743 L 490729 possibly damaging N -Contig67_chr2_84099157_84100880 345 chr2 84099493 ENSCAFT00000025109 ENSCAFP00000023307 I 60 L U benign N -Contig33_chr20_49727730_49730958 2192 chr20 49729935 ENSCAFT00000025308 ENSCAFP00000023495 T 448 R U probably damaging N -Contig33_chr20_49727730_49730958 2907 chr20 49730606 ENSCAFT00000025308 ENSCAFP00000023495 W 493 L U benign N -Contig93_chr8_42181027_42183022 694 chr8 42181716 ENSCAFT00000025462 ENSCAFP00000023641 L 782 P U benign N -Contig131_chrX_44937490_44940040 950 chrX 44938456 ENSCAFT00000025663 ENSCAFP00000023835 V 120 M 491894 benign N -Contig100_chrX_44915404_44918232 1832 chrX 44917224 ENSCAFT00000025663 ENSCAFP00000023835 R 1212 Q 491894 benign N -Contig100_chrX_44915404_44918232 920 chrX 44916331 ENSCAFT00000025663 ENSCAFP00000023835 L 1377 V 491894 benign N -Contig123_chr9_26132942_26133532 310 chr9 26133253 ENSCAFT00000025948 ENSCAFP00000024090 I 232 V 491022 benign N -Contig34_chr6_17772839_17773548 489 chr6 17773329 ENSCAFT00000026008 ENSCAFP00000024146 E 377 Q U benign N -Contig382_chr7_43383655_43383893 190 chr7 43383854 ENSCAFT00000026053 ENSCAFP00000024188 R 123 C U possibly damaging N -Contig163_chr2_87404548_87404792 132 chr2 87404673 ENSCAFT00000026251 ENSCAFP00000024378 D 239 N U benign N -Contig15_chr3_91850893_91851323 75 chr3 91850967 ENSCAFT00000026343 ENSCAFP00000024465 S 722 N 595148 benign cfa04360=Axon guidance -Contig141_chr7_44385686_44386047 166 chr7 44385857 ENSCAFT00000026393 ENSCAFP00000024510 L 166 P 490412 benign cfa04810=Regulation of actin cytoskeleton -Contig161_chr2_87840986_87841705 540 chr2 87841516 ENSCAFT00000026485 ENSCAFP00000024598 F 678 C 478233 probably damaging cfa03018=RNA degradation -Contig177_chr9_27497479_27498192 354 chr9 27497831 ENSCAFT00000026613 ENSCAFP00000024719 A 175 V 491046 possibly damaging N -Contig162_chr6_20156115_20157725 81 chr6 20156197 ENSCAFT00000026687 ENSCAFP00000024793 T 702 M 489923.489924.607168 benign N -Contig8_chr9_28287278_28288276 469 chr9 28287755 ENSCAFT00000026707 ENSCAFP00000024813 A 75 P 491060 benign N -Contig166_chr7_45276673_45277595 235 chr7 45276916 ENSCAFT00000026881 ENSCAFP00000024984 V 525 I 490428 benign N -Contig16_chr8_51223078_51223662 481 chr8 51223563 ENSCAFT00000026967 ENSCAFP00000025070 R 869 Q 490790 benign N -Contig65_chr9_29792446_29793465 893 chr9 29793341 ENSCAFT00000027073 ENSCAFP00000025173 S 81 A 491082 benign N -Contig175_chr6_30926774_30927470 446 chr6 30927229 ENSCAFT00000027269 ENSCAFP00000025361 S 663 T 403453 benign cfa02010=ABC transporters.cfa04977=Vitamin digestion and absorption -Contig45_chr30_33024389_33025619 471 chr30 33024857 ENSCAFT00000027320 ENSCAFP00000025407 G 986 A 487608 benign N -Contig60_chr20_53087461_53088013 184 chr20 53087649 ENSCAFT00000027519 ENSCAFP00000025591 S 556 L 611163 benign N -Contig98_chr5_37073086_37073674 378 chr5 37073467 ENSCAFT00000027596 ENSCAFP00000025664 V 38 M 479499 probably damaging cfa04130=SNARE interactions in vesicular transport -Contig64_chr9_36235086_36235751 475 chr9 36235563 ENSCAFT00000027673 ENSCAFP00000025737 D 260 E 491111 benign cfa04970=Salivary secretion -Contig72_chr30_35330469_35330831 236 chr30 35330709 ENSCAFT00000027712 ENSCAFP00000025770 G 386 C 478353 probably damaging cfa04510=Focal adhesion.cfa04512=ECM-receptor interaction.cfa04810=Regulation of actin cytoskeleton.cfa05410=Hypertrophic cardiomyopathy (HCM).cfa05412=Arrhythmogenic right ventricular cardiomyopathy (ARVC).cfa05414=Dilated cardiomyopathy -Contig12_chr8_66066327_66066629 89 chr8 66066402 ENSCAFT00000027927 ENSCAFP00000025970 K 158 R 490836 benign N -Contig212_chr8_66173086_66174259 622 chr8 66173712 ENSCAFT00000027950 ENSCAFP00000025993 K 114 Q 480421 benign N -Contig176_chr7_48083671_48084458 311 chr7 48083983 ENSCAFT00000027972 ENSCAFP00000026015 R 128 H 480148 probably damaging N -Contig3_chr4_58820541_58821952 265 chr4 58820806 ENSCAFT00000027979 ENSCAFP00000026022 A 31 T 489166 benign N -Contig24_chr7_48238665_48239174 383 chr7 48239049 ENSCAFT00000028007 ENSCAFP00000026049 T 227 M 480151 probably damaging N -Contig25_chr6_26340448_26341519 657 chr6 26341104 ENSCAFT00000028115 ENSCAFP00000026155 S 128 L 479811 possibly damaging N -Contig212_chr5_38871122_38871621 302 chr5 38871429 ENSCAFT00000028231 ENSCAFP00000026253 P 2265 L 489507 benign N -Contig147_chr6_27310627_27310983 100 chr6 27310719 ENSCAFT00000028327 ENSCAFP00000026344 V 154 A U benign N -Contig160_chr6_27318582_27318861 67 chr6 27318647 ENSCAFT00000028327 ENSCAFP00000026344 K 325 R U benign N -Contig18_chr4_61023435_61026038 385 chr4 61023825 ENSCAFT00000028363 ENSCAFP00000026377 P 4110 L 479323 benign N -Contig162_chr30_40685605_40687049 343 chr30 40685956 ENSCAFT00000028463 ENSCAFP00000026472 A 416 P 487646 benign N -Contig68_chr20_54017481_54018354 221 chr20 54017705 ENSCAFT00000028500 ENSCAFP00000026509 W 539 R U benign N -Contig50_chr7_59076761_59079381 2353 chr7 59079104 ENSCAFT00000028551 ENSCAFP00000026557 V 1487 I 490492 benign N -Contig51_chr7_59079274_59084588 2611 chr7 59081905 ENSCAFT00000028551 ENSCAFP00000026557 A 575 V 490492 benign N -Contig3_chr20_54855789_54856135 37 chr20 54855833 ENSCAFT00000028813 ENSCAFP00000026796 F 6015 S U unknown N -Contig157_chr5_43472186_43472528 168 chr5 43472353 ENSCAFT00000028826 ENSCAFP00000026807 R 355 Q 489526 benign cfa00010=Glycolysis / Gluconeogenesis.cfa00340=Histidine metabolism.cfa00350=Tyrosine metabolism.cfa00360=Phenylalanine metabolism.cfa00410=beta-Alanine metabolism.cfa00980=Metabolism of xenobiotics by cytochrome P450.cfa00982=Drug metabolism - cytochrome P450.cfa01100=Metabolic pathways -Contig80_chr20_55281094_55281971 129 chr20 55281228 ENSCAFT00000028936 ENSCAFP00000026914 T 931 A U benign N -Contig214_chr8_74493164_74493474 188 chr8 74493346 ENSCAFT00000029054 ENSCAFP00000027017 R 94 C U probably damaging N -Contig259_chr20_55571618_55572503 186 chr20 55571803 ENSCAFT00000029100 ENSCAFP00000027059 K 526 Q 485001 benign N -Contig180_chr9_41668066_41668716 357 chr9 41668451 ENSCAFT00000029122 ENSCAFP00000027081 E 990 D 491145 benign cfa03410=Base excision repair -Contig61_chr4_63087183_63089623 491 chr4 63087672 ENSCAFT00000029130 ENSCAFP00000027089 A 20 S U benign N -Contig261_chrX_94412915_94414298 488 chrX 94413396 ENSCAFT00000029188 ENSCAFP00000027142 D 329 E U unknown N -Contig58_chr4_70221679_70223505 1749 chr4 70223432 ENSCAFT00000029501 ENSCAFP00000027423 T 324 S 403721 benign cfa04060=Cytokine-cytokine receptor interaction.cfa04080=Neuroactive ligand-receptor interaction.cfa04630=Jak-STAT signaling pathway -Contig21_chr7_77985141_77986170 827 chr7 77985962 ENSCAFT00000029651 ENSCAFP00000027557 A 855 S 490545 benign N -Contig93_chrX_104176429_104177974 811 chrX 104177246 ENSCAFT00000029709 ENSCAFP00000027610 T 719 M 492128 benign cfa03008=Ribosome biogenesis in eukaryotes -Contig175_chr9_46116277_46118268 1090 chr9 46117366 ENSCAFT00000029722 ENSCAFP00000027622 Q 693 H U benign N -Contig134_chr4_76495667_76496825 860 chr4 76496507 ENSCAFT00000029827 ENSCAFP00000027720 I 113 V 612589 benign cfa00250=Alanine, aspartate and glutamate metabolism.cfa00260=Glycine, serine and threonine metabolism.cfa01100=Metabolic pathways -Contig247_chr6_31967574_31967796 158 chr6 31967732 ENSCAFT00000029875 ENSCAFP00000027765 P 750 T 489999 benign N -Contig6_chr7_81650872_81657348 3786 chr7 81654636 ENSCAFT00000030050 ENSCAFP00000027927 S 501 C 480218 benign N -Contig122_chr5_57147596_57148457 360 chr5 57147964 ENSCAFT00000030140 ENSCAFP00000028007 T 713 I 479558 benign N -Contig83_chr20_58039274_58039724 380 chr20 58039649 ENSCAFT00000030192 ENSCAFP00000028056 E 142 K 611866 benign N -Contig42_chr5_58023274_58024296 585 chr5 58023845 ENSCAFT00000030282 ENSCAFP00000028135 V 415 A 489580 benign N -Contig248_chr20_58217741_58219717 751 chr20 58218495 ENSCAFT00000030285 ENSCAFP00000028138 G 278 S 485038 unknown N -Contig127_chr6_39501489_39501966 83 chr6 39501576 ENSCAFT00000030381 ENSCAFP00000028228 N 155 S 490020 benign N -Contig123_chr6_39499974_39501056 816 chr6 39500798 ENSCAFT00000030381 ENSCAFP00000028228 A 195 P 490020 benign N -Contig247_chr6_39576694_39577607 493 chr6 39577171 ENSCAFT00000030386 ENSCAFP00000028233 S 745 N 490021 benign N -Contig6_chr9_50725202_50725646 143 chr9 50725344 ENSCAFT00000030726 ENSCAFP00000028560 M 12 T 491218 benign N -Contig221_chr6_41879771_41881379 766 chr6 41880519 ENSCAFT00000030883 ENSCAFP00000028717 A 184 T 606755 benign N -Contig231_chr5_60474911_60475630 279 chr5 60475186 ENSCAFT00000030960 ENSCAFP00000028794 C 505 Y 489618 possibly damaging N -Contig99_chr5_63306202_63308496 2063 chr5 63308224 ENSCAFT00000031146 ENSCAFP00000028978 A 421 V U unknown N -Contig245_chr5_66149146_66149848 349 chr5 66149499 ENSCAFT00000031407 ENSCAFP00000029234 R 207 Q 479601 benign cfa00760=Nicotinate and nicotinamide metabolism.cfa01100=Metabolic pathways -Contig305_chr5_67253589_67254394 375 chr5 67253954 ENSCAFT00000031570 ENSCAFP00000029391 R 203 Q U possibly damaging N -Contig94_chr9_56873843_56875505 1578 chr9 56875408 ENSCAFT00000031743 ENSCAFP00000029555 P 2937 S U benign N -Contig107_chr5_71317862_71318113 71 chr5 71317944 ENSCAFT00000031781 ENSCAFP00000029590 M 281 V U benign N -Contig134_chr9_57426140_57427208 236 chr9 57426380 ENSCAFT00000031798 ENSCAFP00000029606 V 89 I 480698 benign cfa00590=Arachidonic acid metabolism.cfa01100=Metabolic pathways -Contig60_chr12_5631507_5632392 818 chr12 5632313 ENSCAFT00000031814 ENSCAFP00000029621 Y 1697 C 481734 unknown cfa04510=Focal adhesion.cfa04512=ECM-receptor interaction.cfa04974=Protein digestion and absorption.cfa05146=Amoebiasis -Contig132_chr5_73710776_73711271 149 chr5 73710927 ENSCAFT00000031848 ENSCAFP00000029653 T 1323 M 489696 probably damaging N -Contig39_chr9_59278364_59279024 398 chr9 59278757 ENSCAFT00000032068 ENSCAFP00000029863 A 957 T 480718 benign N -Contig177_chr9_61212763_61213621 700 chr9 61213430 ENSCAFT00000032171 ENSCAFP00000029958 D 79 N U benign N -Contig67_chr6_62507717_62510152 1055 chr6 62508787 ENSCAFT00000032186 ENSCAFP00000029972 I 212 M 479959 probably damaging cfa00380=Tryptophan metabolism.cfa00450=Selenocompound metabolism.cfa01100=Metabolic pathways -Contig66_chr6_64570039_64570630 325 chr6 64570365 ENSCAFT00000032239 ENSCAFP00000030024 A 862 G 479964 benign cfa04740=Olfactory transduction.cfa04972=Pancreatic secretion -Contig22_chr6_64809414_64810661 626 chr6 64810027 ENSCAFT00000032269 ENSCAFP00000030052 Q 559 K 490179 benign N -Contig50_chr5_85052459_85052865 55 chr5 85052515 ENSCAFT00000032431 ENSCAFP00000030201 S 32 G 479688 benign N -Contig25_chr5_85095840_85098495 1627 chr5 85097474 ENSCAFT00000032433 ENSCAFP00000030203 F 681 S U benign N -Contig25_chr5_85480673_85480982 186 chr5 85480860 ENSCAFT00000032493 ENSCAFP00000030260 A 180 T 610026 possibly damaging cfa05010=Alzheimer's disease -Contig19_chr5_24601128_24602241 685 chr5 24601813 ENSCAFT00000035141 ENSCAFP00000030364 T 695 S U benign N -Contig59_chr26_11519273_11520242 659 chr26 11519937 ENSCAFT00000035276 ENSCAFP00000030520 P 160 L 403557 probably damaging cfa03015=mRNA surveillance pathway.cfa04114=Oocyte meiosis.cfa04270=Vascular smooth muscle contraction.cfa04510=Focal adhesion.cfa04720=Long-term potentiation.cfa04728=Dopaminergic synapse.cfa04810=Regulation of actin cytoskeleton.cfa04910=Insulin signaling pathway.cfa05168=Herpes simplex infection -Contig27_chr12_23130802_23131771 353 chr12 23131154 ENSCAFT00000035307 ENSCAFP00000030552 V 565 M 474935 probably damaging N -Contig31_chr1_8052327_8053606 234 chr1 8052570 ENSCAFT00000035442 ENSCAFP00000030703 C 153 S U possibly damaging N -Contig59_chr20_40539078_40540678 1223 chr20 40540302 ENSCAFT00000035532 ENSCAFP00000030804 H 285 R 403502 benign cfa04620=Toll-like receptor signaling pathway.cfa05142=Chagas disease (American trypanosomiasis).cfa05143=African trypanosomiasis.cfa05144=Malaria.cfa05152=Tuberculosis.cfa05162=Measles.cfa05168=Herpes simplex infection -Contig152_chr6_25356961_25358151 701 chr6 25357665 ENSCAFT00000035750 ENSCAFP00000031044 P 479 S 608555 benign cfa04142=Lysosome -Contig18_chr9_58576258_58576773 215 chr9 58576474 ENSCAFT00000035914 ENSCAFP00000031224 K 118 E 480706 benign N -Contig8_chr15_38734005_38734403 242 chr15 38734244 ENSCAFT00000035916 ENSCAFP00000031226 A 237 V 611996 possibly damaging N -Contig76_chr3_30625909_30626247 159 chr3 30626069 ENSCAFT00000036198 ENSCAFP00000031549 T 135 S 479171 benign cfa00260=Glycine, serine and threonine metabolism.cfa00270=Cysteine and methionine metabolism.cfa01100=Metabolic pathways -Contig86_chr37_14528768_14530343 873 chr37 14529628 ENSCAFT00000036570 ENSCAFP00000031969 V 738 D 478875.609202 possibly damaging cfa04060=Cytokine-cytokine receptor interaction.cfa04350=TGF-beta signaling pathway -Contig9_chr5_54124181_54125739 1134 chr5 54125291 ENSCAFT00000036640 ENSCAFP00000032043 A 187 T 610286 benign N -Contig107_chr9_8990420_8991676 1178 chr9 8991591 ENSCAFT00000036774 ENSCAFP00000032186 T 55 M 483288 benign N -Contig47_chr12_20319418_20320775 1212 chr12 20320622 ENSCAFT00000036825 ENSCAFP00000032241 K 606 T 474930 benign cfa00280=Valine, leucine and isoleucine degradation.cfa00630=Glyoxylate and dicarboxylate metabolism.cfa00640=Propanoate metabolism.cfa01100=Metabolic pathways -Contig4_chr2_45195542_45196115 233 chr2 45195785 ENSCAFT00000037022 ENSCAFP00000032463 D 833 N 478055 possibly damaging N -Contig8_chr8_77227029_77227651 339 chr8 77227366 ENSCAFT00000037096 ENSCAFP00000032544 T 61 A 490895.612602 benign cfa04020=Calcium signaling pathway.cfa04145=Phagosome.cfa04640=Hematopoietic cell lineage.cfa04650=Natural killer cell mediated cytotoxicity.cfa04662=B cell receptor signaling pathway.cfa04664=Fc epsilon RI signaling pathway.cfa04666=Fc gamma R-mediated phagocytosis.cfa04672=Intestinal immune network for IgA production.cfa05140=Leishmaniasis.cfa05143=African trypanosomiasis.cfa05146=Amoebiasis.cfa05150=Staphylococcus aureus infection.cfa05152=Tuberculosis.cfa05162=Measles.cfa05310=Asthma.cfa05320=Autoimmune thyroid disease.cfa05322=Systemic lupus erythematosus.cfa05323=Rheumatoid arthritis.cfa05330=Allograft rejection.cfa05340=Primary immunodeficiency.cfa05414=Dilated cardiomyopathy.cfa05416=Viral myocarditis -Contig2_chr7_60049092_60051693 266 chr7 60049361 ENSCAFT00000038176 ENSCAFP00000033857 T 195 M U probably damaging N -Contig31_chr30_24179816_24187402 4867 chr30 24184686 ENSCAFT00000038211 ENSCAFP00000033897 G 103 S U benign N -Contig9_chr27_48250956_48251793 192 chr27 48251161 ENSCAFT00000038256 ENSCAFP00000033944 T 166 M 477739 probably damaging N -Contig45_chr27_43537046_43537944 568 chr27 43537599 ENSCAFT00000038301 ENSCAFP00000033996 M 69 I 611773 benign cfa04010=MAPK signaling pathway.cfa04810=Regulation of actin cytoskeleton.cfa05200=Pathways in cancer.cfa05218=Melanoma -Contig133_chr18_28371600_28372547 83 chr18 28371695 ENSCAFT00000038383 ENSCAFP00000034090 L 102 Q 475933 probably damaging N -Contig11_chr28_8532951_8533892 511 chr28 8533462 ENSCAFT00000038937 ENSCAFP00000034728 R 19 C 477763 probably damaging cfa03008=Ribosome biogenesis in eukaryotes.cfa03013=RNA transport -Contig1_chr14_5733966_5735336 783 chr14 5734754 ENSCAFT00000039094 ENSCAFP00000034905 A 166 T U benign N -Contig48_chr27_6001075_6001818 392 chr27 6001478 ENSCAFT00000039109 ENSCAFP00000034919 R 103 H U probably damaging N -Contig40_chr11_43589173_43590288 973 chr11 43590138 ENSCAFT00000039148 ENSCAFP00000034962 R 1617 P 481557 benign N -Contig1_chr14_30424688_30425258 179 chr14 30424861 ENSCAFT00000039390 ENSCAFP00000035239 T 648 I 475245 benign cfa04666=Fc gamma R-mediated phagocytosis.cfa04810=Regulation of actin cytoskeleton -Contig58_chr8_7461111_7462065 323 chr8 7461423 ENSCAFT00000039451 ENSCAFP00000035309 L 112 F U benign N -Contig1_chr25_43094809_43095852 908 chr25 43095708 ENSCAFT00000039609 ENSCAFP00000035483 W 18 G U unknown N -Contig114_chr25_43076436_43076800 141 chr25 43076581 ENSCAFT00000039609 ENSCAFP00000035483 S 45 C U unknown N
--- a/test-data/test_in/sample.gd_snp Fri Sep 28 11:34:31 2012 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,402 +0,0 @@ -#{"column_names":["scaf","pos","A","B","qual","ref","rpos","rnuc","1A","1B","1G","1Q","2A","2B","2G","2Q","3A","3B","3G","3Q","4A","4B","4G","4Q","5A","5B","5G","5Q","6A","6B","6G","6Q","pair","dist", -#"prim","rflp"],"dbkey":"canFam2","individuals":[["PB1",9],["PB2",13],["PB3",17],["PB4",21],["PB6",25],["PB8",29]],"pos":2,"rPos":7,"ref":6,"scaffold":1,"species":"bear"} -Contig161_chr1_4641264_4641879 115 C T 73.5 chr1 4641382 C 6 0 2 45 8 0 2 51 15 0 2 72 5 0 2 42 6 0 2 45 10 0 2 57 Y 54 0.323 0 -Contig48_chr1_10150253_10151311 11 A G 94.3 chr1 10150264 A 1 0 2 30 1 0 2 30 1 0 2 30 3 0 2 36 1 0 2 30 1 0 2 30 Y 22 +99. 0 -Contig20_chr1_21313469_21313570 66 C T 54.0 chr1 21313534 C 4 0 2 39 4 0 2 39 5 0 2 42 4 0 2 39 4 0 2 39 5 0 2 42 N 1 +99. 0 -Contig86_chr1_30984450_30985684 670 C T 365.0 chr1 30985133 C 9 0 2 54 10 0 2 57 13 0 2 66 3 0 2 36 9 0 2 54 7 0 2 48 Y 145 0.031 0 -Contig5_chr1_32562160_32563940 1215 G T 163.0 chr1 32563356 G 17 0 2 78 19 0 2 84 20 0 2 87 14 0 2 69 12 0 2 63 10 0 2 57 Y 17 0.251 0 -Contig110_chr1_33385093_33386888 510 C T 270.0 chr1 33385587 A 14 0 2 69 11 0 2 60 19 0 2 84 11 0 2 60 10 0 2 57 13 0 2 66 Y 13 0.126 0 -Contig100_chr1_33562920_33564288 743 C T 178.0 chr1 33563655 C 6 0 2 45 10 0 2 57 8 0 2 51 5 0 2 42 13 0 2 66 7 0 2 48 Y 13 0.090 3 -Contig7_chr1_37302355_37302489 97 A G 59.2 chr1 37302452 G 3 0 2 36 8 0 2 51 5 0 2 42 8 0 2 51 7 0 2 48 6 0 2 45 N 56 2.812 0 -Contig62_chr1_41880715_41882180 1078 T G 57.6 chr1 41881785 T 14 0 2 69 15 0 2 72 16 0 2 75 13 0 2 66 8 0 2 51 10 0 2 57 Y 21 0.477 0 -Contig47_chr1_48409178_48409384 37 C T 134.0 chr1 48409215 T 5 0 2 42 6 0 2 45 8 0 2 51 9 0 2 54 4 0 2 39 6 0 2 45 N 66 +99. 0 -Contig119_chr1_49647683_49650077 1618 C A 99.7 chr1 49649276 A 8 0 2 51 11 0 2 60 10 0 2 57 9 0 2 54 10 0 2 57 14 0 2 69 Y 16 0.166 0 -Contig21_chr1_60697952_60699446 307 G A 51.9 chr1 60698265 G 12 0 2 63 9 0 2 54 4 0 2 39 6 0 2 45 9 0 2 54 4 0 2 39 Y 98 0.507 0 -Contig131_chr1_62319542_62320564 169 C G 103.0 chr1 62319709 C 12 0 2 63 12 0 2 66 14 0 2 69 12 0 2 63 9 0 2 54 9 0 2 54 Y 73 0.307 1 -Contig14_chr1_63450425_63450680 101 T A 102.0 chr1 63450530 T 8 0 2 51 10 0 2 57 18 0 2 81 8 0 2 51 8 0 2 34 8 0 2 51 N 99 1.085 0 -Contig83_chr1_63869778_63869942 40 T C 23.7 chr1 63869819 C 5 0 2 42 7 0 2 48 2 0 2 33 4 0 2 39 6 0 2 48 4 0 2 39 N 654 1.364 0 -Contig30_chr1_64702572_64703138 178 A T 117.0 chr1 64702750 T 10 0 2 57 10 0 2 57 20 0 2 87 21 0 2 90 6 0 2 45 12 0 2 63 Y 50 3.872 0 -Contig101_chr1_69868406_69868872 287 G A 14.6 chr1 69868689 G 13 0 2 66 17 0 2 78 10 0 2 57 8 0 2 51 7 0 2 48 8 0 2 51 N 137 0.305 0 -Contig35_chr1_74482577_74482791 170 G A 45.4 chr1 74482751 A 3 0 2 36 4 0 2 39 13 0 2 66 2 0 2 33 5 0 2 42 2 0 2 33 N 20 +99. 3 -Contig49_chr1_83865731_83865944 85 G A 34.1 chr1 -1 N 4 0 2 39 4 0 2 39 8 0 2 51 2 0 2 33 5 0 2 42 4 0 2 39 N -1 1.485 0 -Contig64_chr1_87343284_87345672 163 T A 3.76 chr1 87343443 C 0 2 2 1 0 0 -1 0 5 0 2 42 2 0 2 33 0 1 2 14 0 0 -1 0 N 3 0.039 2 -Contig20_chr1_110679280_110679687 181 C T 87.4 chr1 110679454 - 1 0 2 30 7 0 2 48 4 0 2 39 2 0 2 33 2 0 2 33 0 0 -1 0 N 31 0.660 2 -Contig129_chr1_117547123_117548666 926 G A 126.0 chr1 117548059 G 19 0 2 84 9 0 2 54 11 0 2 60 10 0 2 57 12 0 2 63 11 0 2 60 Y 64 0.049 0 -Contig7_chr1_125154638_125154844 190 G T 130.0 chr1 125154818 A 5 0 2 42 4 0 2 39 7 0 2 48 2 0 2 33 7 0 2 48 4 0 2 39 N 33 +99. 0 -Contig222_chr2_9817738_9818143 220 C T 888.0 chr2 9817960 C 17 0 2 78 12 0 2 63 20 0 2 87 8 0 2 51 11 0 2 60 12 0 2 63 Y 76 0.093 1 -Contig47_chr2_25470778_25471576 126 G A 888.0 chr2 25470896 G 12 0 2 63 14 0 2 69 14 0 2 69 10 0 2 57 18 0 2 81 13 0 2 66 N 11 0.289 1 -Contig10_chr2_40859744_40860534 637 G A 888.0 chr2 40860397 A 3 0 2 36 3 0 2 36 2 0 2 33 7 0 2 48 6 0 2 45 8 0 2 51 Y 42 1.435 0 -Contig52_chr2_41421981_41422725 604 C A 888.0 chr2 41422583 A 17 0 2 78 18 0 2 81 14 0 2 69 17 0 2 78 12 0 2 63 14 0 2 69 Y 44 0.882 0 -Contig94_chr2_43869105_43870358 220 G A 888.0 chr2 43869333 G 12 0 2 63 18 0 2 81 11 0 2 60 15 0 2 72 12 0 2 63 13 0 2 66 Y 1 0.156 0 -Contig34_chr2_48444129_48444939 695 C T 134.0 chr2 48444828 C 14 0 2 69 8 0 2 51 16 0 2 75 17 0 2 78 9 0 2 54 15 0 2 72 Y 161 0.375 0 -Contig6_chr2_56859179_56859956 671 T C 999.9 chr2 56859851 T 15 0 2 72 18 0 2 81 20 0 2 90 19 0 2 84 19 0 2 84 24 0 2 99 N 28 5.308 1 -Contig115_chr2_61631913_61632510 310 G T 999.3 chr2 61632216 G 7 0 2 48 9 0 2 54 7 0 2 48 11 0 2 60 10 0 2 57 10 0 2 57 N 13 0.184 0 -Contig31_chr2_67331584_67331785 39 C T 999.0 chr2 67331623 C 11 0 2 60 10 0 2 57 7 0 2 48 9 0 2 54 2 0 2 33 4 0 2 39 N 110 0.647 1 -Contig92_chr2_75906683_75907774 773 T C 85.4 chr2 75907438 C 12 0 2 63 12 0 2 63 17 0 2 78 8 0 2 51 8 0 2 51 13 0 2 66 Y 93 0.166 0 -Contig163_chr2_76402959_76404830 221 C T 127.0 chr2 76403181 C 4 0 2 42 10 0 2 57 9 0 2 54 11 0 2 60 7 0 2 48 9 0 2 54 Y 54 0.178 1 -Contig177_chr2_79559305_79560033 168 C T 5.67 chr2 79559476 A 2 0 2 33 3 0 2 36 1 0 2 30 2 0 2 33 0 0 -1 0 1 0 2 30 N 56 0.257 0 -Contig8_chr2_82945728_82945839 61 T C 223.0 chr2 -1 N 2 0 2 33 4 0 2 39 9 0 2 54 3 0 2 36 5 0 2 42 0 0 -1 0 N -1 +99. 1 -Contig59_chr2_85243022_85243758 506 G A 96.3 chr2 85243509 T 9 0 2 54 11 0 2 60 12 0 2 63 14 0 2 69 10 0 2 57 7 0 2 48 Y 6 0.459 0 -Contig56_chr3_17326225_17327548 387 G C 91.2 chr3 17326591 G 14 0 2 69 13 0 2 66 15 0 2 72 15 0 2 72 13 0 2 66 12 0 2 63 Y 20 0.225 3 -Contig108_chr3_46210055_46210874 367 A G 21.0 chr3 46210423 A 19 0 2 84 10 0 2 57 16 0 2 75 14 0 2 69 20 0 2 87 11 0 2 60 N 236 0.028 1 -Contig16_chr3_47113407_47114449 322 G A 105.0 chr3 47113713 G 13 0 2 66 17 0 2 78 15 0 2 72 6 0 2 45 11 0 2 60 11 0 2 60 Y 114 0.132 5 -Contig3_chr3_47564810_47565251 262 T G 112.0 chr3 47565104 T 14 0 2 69 16 0 2 75 20 0 2 87 10 0 2 57 9 0 2 54 8 0 2 51 Y 24 0.073 1 -Contig35_chr3_49662401_49662929 270 A T 96.1 chr3 49662652 A 14 0 2 69 11 0 2 60 23 0 2 96 13 0 2 66 12 0 2 63 11 0 2 60 Y 36 3.583 2 -Contig97_chr3_49820354_49821631 1069 G A 44.1 chr3 49821402 G 9 0 2 54 9 0 2 54 6 0 2 45 10 0 2 57 5 0 2 42 8 0 2 51 N 6 0.201 2 -Contig1_chr3_51588422_51589409 926 A G 51.0 chr3 51589353 G 2 0 2 33 2 0 2 33 6 0 2 45 4 0 2 39 9 0 2 54 11 0 2 60 N 21 1.147 0 -Contig25_chr3_53260697_53262560 402 G A 211.0 chr3 53261095 G 17 0 2 78 14 0 2 69 15 0 2 75 12 0 2 63 14 0 2 69 12 0 2 63 Y 116 1.033 0 -Contig11_chr3_53992739_53995954 2392 G A 82.4 chr3 53995143 A 12 0 2 66 11 0 2 60 14 0 2 69 6 0 2 45 11 0 2 60 17 0 2 78 Y 358 0.321 1 -Contig236_chr3_72676275_72676473 128 G A 278.0 chr3 72676410 G 12 0 2 63 11 0 2 60 13 0 2 66 10 0 2 57 11 0 2 60 8 0 2 51 N 36 0.496 1 -Contig48_chr3_74792236_74792388 63 T C 111.0 chr3 74792289 - 17 0 2 78 9 0 2 54 9 0 2 54 5 0 2 42 11 0 2 60 9 0 2 54 N -1 3.528 0 -Contig65_chr3_80727952_80728283 39 T C 71.2 chr3 80727990 T 7 0 2 48 3 0 2 36 8 0 2 51 6 0 2 45 8 0 2 51 11 0 2 60 N 22 7.078 0 -Contig53_chr3_86407941_86409349 1406 G A 86.9 chr3 86409317 A 5 0 2 42 5 0 2 42 4 0 2 39 10 0 2 57 8 0 2 51 12 0 2 63 N 14 3.285 1 -Contig13_chr3_92409738_92412300 718 A G 23.3 chr3 92410450 A 12 0 2 63 16 0 2 75 18 0 2 81 13 0 2 66 22 0 2 93 7 0 2 48 Y 23 0.224 2 -Contig134_chr4_12145648_12148225 1326 C T 164.0 chr4 12146961 C 9 0 2 54 8 0 2 51 7 0 2 48 3 0 2 36 5 0 2 42 5 0 2 42 Y 4 0.080 1 -Contig88_chr4_15557471_15557833 268 A G 145.0 chr4 15557737 A 6 0 2 45 6 0 2 45 11 0 2 60 9 0 2 54 5 0 2 42 6 0 2 45 Y 46 4.138 0 -Contig53_chr4_18823968_18824478 149 A G 91.3 chr4 18824115 A 18 0 2 81 15 0 2 72 21 0 2 90 13 0 2 66 9 0 2 54 12 0 2 63 N 51 0.251 0 -Contig86_chr4_24953866_24956222 1985 C T 76.4 chr4 24955841 T 8 0 2 51 1 0 2 30 3 0 2 36 7 0 2 48 2 0 2 33 6 0 2 45 Y 12 0.357 0 -Contig19_chr4_26233601_26233991 146 G C 51.6 chr4 26233744 G 10 0 2 57 8 0 2 51 9 0 2 54 5 0 2 42 9 0 2 54 4 0 2 39 N 41 0.163 3 -Contig78_chr4_28579975_28580134 30 T G 19.6 chr4 28579994 - 4 0 2 39 3 0 2 36 5 0 2 42 4 0 2 39 2 0 2 33 2 0 2 33 N 33 0.499 0 -Contig16_chr4_30177226_30179725 621 C T 88.4 chr4 30177859 C 20 0 2 87 13 0 2 66 13 0 2 66 11 0 2 60 8 0 2 51 8 0 2 51 Y 45 0.797 1 -Contig30_chr4_46196500_46197672 1045 A C 33.4 chr4 46197522 C 16 0 2 75 9 0 2 54 4 0 2 39 7 0 2 48 14 0 2 69 6 0 2 45 Y 43 0.306 0 -Contig2_chr4_47039007_47039323 158 G C 35.1 chr4 47039160 - 8 0 2 51 9 0 2 54 13 0 2 66 8 0 2 51 10 0 2 60 9 0 2 54 N 0 0.131 0 -Contig17_chr4_61310346_61311158 267 C T 49.9 chr4 61310604 T 10 0 2 57 7 0 2 48 9 0 2 54 10 0 2 57 14 0 2 69 7 0 2 48 Y 219 0.098 0 -Contig26_chr4_64190783_64191295 64 A G 162.0 chr4 64190843 A 10 0 2 57 6 0 2 45 20 0 2 87 12 0 2 63 17 0 2 78 7 0 2 48 Y 306 7.428 0 -Contig11_chr4_65500960_65501654 634 T C 107.0 chr4 65501585 T 13 0 2 66 14 0 2 69 13 0 2 66 13 0 2 66 6 0 2 45 18 0 2 81 Y 10 6.849 0 -Contig38_chr4_67768488_67768982 113 A G 102.0 chr4 67768598 A 9 0 2 54 8 0 2 51 9 0 2 54 11 0 2 60 10 0 2 57 7 0 2 48 Y 188 3.175 0 -Contig30_chr4_70978564_70979580 596 A G 164.0 chr4 70979151 A 15 0 2 72 12 0 2 63 20 0 2 87 14 0 2 69 15 0 2 72 15 0 2 72 Y 111 2.458 2 -Contig72_chr4_74225793_74226492 674 A G 110.0 chr4 74226472 A 5 0 2 42 3 0 2 36 2 0 2 33 3 0 2 36 7 0 2 48 4 0 2 39 Y 115 +99. 1 -Contig32_chr4_75618955_75620254 301 T C 333.0 chr4 75619257 C 10 0 2 57 8 0 2 51 12 0 2 63 20 0 2 87 12 0 2 63 14 0 2 69 Y 34 0.163 2 -Contig31_chr5_4734956_4736547 1166 C T 133.0 chr5 4736132 C 14 0 2 69 8 0 2 51 17 0 2 78 4 0 2 39 9 0 2 54 12 0 2 63 Y 1 0.021 0 -Contig113_chr5_11052263_11052603 28 C T 38.2 chr5 11052280 C 1 2 1 12 3 2 1 10 5 0 2 42 2 1 2 13 3 0 2 36 8 0 2 51 Y 161 +99. 0 -Contig30_chr5_15698241_15699076 396 G T 76.6 chr5 15698633 T 8 0 2 51 9 0 2 54 10 0 2 57 7 0 2 48 11 0 2 60 8 0 2 54 Y 65 0.009 0 -Contig36_chr5_17709244_17710004 373 T C 281.0 chr5 17709624 T 6 0 2 45 9 0 2 54 7 0 2 48 4 0 2 39 10 0 2 57 4 0 2 39 Y 16 0.131 0 -Contig13_chr5_21881138_21881562 227 A G 251.0 chr5 21881356 A 11 0 2 60 20 0 2 87 22 0 2 93 10 0 2 57 10 0 2 57 21 0 2 90 Y 182 2.013 0 -Contig5_chr5_23188121_23190168 1841 C T 141.0 chr5 23189975 C 20 0 2 87 19 0 2 84 22 0 2 93 16 0 2 75 18 0 2 81 14 0 2 69 N 45 0.355 0 -Contig6_chr5_26899813_26900498 97 A C 88.6 chr5 26899910 A 15 0 2 72 14 0 2 69 27 0 2 108 15 0 2 72 13 0 2 69 12 0 2 63 Y 92 7.370 3 -Contig314_chr5_34019166_34019319 72 C A 20.1 chr5 -1 N 6 0 2 45 9 0 2 54 4 0 2 39 4 0 2 39 9 0 2 54 5 0 2 42 N -1 +99. 4 -Contig147_chr5_38980258_38980559 221 C T 40.8 chr5 38980477 C 15 0 2 72 15 0 2 72 19 0 2 84 10 0 2 57 12 0 2 63 20 0 2 87 Y 11 4.576 0 -Contig115_chr5_48119079_48120169 151 C T 78.3 chr5 48119234 C 17 0 2 78 10 0 2 57 14 0 2 69 16 0 2 75 8 0 2 51 12 0 2 63 Y 205 0.320 0 -Contig45_chr5_50892738_50892968 169 C A 25.8 chr5 50892911 C 10 0 2 57 7 0 2 48 10 0 2 60 6 0 2 45 6 0 2 45 13 0 2 66 N 244 0.497 1 -Contig40_chr5_51484164_51484696 14 A G 53.3 chr5 51484180 A 6 0 2 45 4 0 2 39 4 0 2 39 3 0 2 36 0 0 2 13 3 0 2 36 N 63 +99. 1 -Contig40_chr5_51664286_51667573 861 C T 148.0 chr5 51665149 C 20 0 2 87 21 0 2 90 20 0 2 87 11 0 2 60 16 0 2 75 15 0 2 72 Y 207 0.080 1 -Contig15_chr5_51889708_51891244 882 A G 149.0 chr5 51890581 G 13 0 2 66 18 0 2 81 17 0 2 78 22 0 2 93 15 0 2 72 22 0 2 93 Y 7 0.025 1 -Contig143_chr5_57231364_57232010 294 T C 78.5 chr5 57231644 T 3 0 2 36 5 0 2 42 4 0 2 39 2 0 2 33 10 0 2 57 6 0 2 45 Y 73 0.337 2 -Contig13_chr5_57609985_57610584 496 C T 50.5 chr5 57610476 C 17 0 2 78 9 0 2 54 6 0 2 45 8 0 2 51 10 0 2 57 12 0 2 63 N 77 2.022 1 -Contig230_chr5_58486998_58487280 227 T C 192.0 chr5 58487232 T 3 0 2 36 4 0 2 39 9 0 2 54 6 0 2 45 4 0 2 39 7 0 2 48 N 24 0.100 2 -Contig385_chr5_60122961_60123128 15 C G 136.0 chr5 60122976 C 0 0 -1 0 0 0 -1 0 1 0 2 30 1 0 2 30 3 0 2 36 0 0 -1 0 N 100 +99. 2 -Contig143_chr5_65121393_65122035 558 C A 127.0 chr5 65121959 A 0 0 -1 0 5 0 2 42 3 0 2 36 4 0 2 39 0 0 -1 0 4 0 2 39 Y 285 0.391 1 -Contig32_chr5_70852360_70853289 282 G A 114.0 chr5 70852623 G 16 0 2 75 11 0 2 60 13 0 2 66 12 0 2 63 13 0 2 66 7 0 2 48 Y 33 0.276 0 -Contig215_chr5_70946445_70947428 363 T G 28.2 chr5 70946809 C 4 0 2 39 0 5 0 12 9 0 2 54 6 0 2 45 3 3 2 1 9 0 2 54 N 43 0.153 0 -Contig100_chr5_71189678_71190590 813 C T 30.8 chr5 71190523 C 11 0 2 60 11 0 2 60 9 0 2 54 10 0 2 57 6 0 2 45 13 0 2 66 Y 8 0.362 1 -Contig45_chr5_76133561_76134403 388 A G 103.0 chr5 76133941 G 3 0 2 36 8 0 2 51 8 0 2 51 5 0 2 42 6 0 2 45 7 0 2 48 Y 57 0.038 0 -Contig61_chr5_90202541_90204393 909 C T 101.0 chr5 90203461 T 7 0 2 48 5 0 2 42 14 0 2 69 3 0 2 36 5 0 2 42 8 0 2 51 Y 64 1.448 0 -Contig111_chr6_5821219_5822519 1060 A G 68.1 chr6 5822321 T 7 0 2 48 6 0 2 45 11 0 2 60 9 0 2 54 3 0 2 36 12 0 2 63 Y 7 0.231 1 -Contig220_chr6_10671338_10672441 999 T C 36.3 chr6 10672322 T 5 0 2 42 11 0 2 60 11 0 2 60 8 0 2 51 5 0 2 42 9 0 2 54 Y 1 1.667 0 -Contig226_chr6_17361986_17362884 418 G C 251.0 chr6 17362406 G 6 0 2 45 8 0 2 51 7 0 2 48 9 0 2 54 7 0 2 48 7 0 2 48 Y 7 0.147 0 -Contig380_chr6_18173971_18174169 180 C T 4.87 chr6 18174144 T 0 0 -1 0 4 0 2 39 7 0 2 48 2 0 2 33 2 0 2 33 1 0 2 30 N 56 2.589 0 -Contig51_chr6_20231207_20231785 161 A G 70.5 chr6 20231375 G 13 0 2 66 5 0 2 42 8 0 2 51 2 0 2 36 5 0 2 42 5 0 2 42 Y 153 1.754 0 -Contig102_chr6_30271329_30271577 39 T G 139.0 chr6 30271371 G 3 0 2 36 4 0 2 39 6 0 2 45 1 0 2 30 4 0 2 39 4 0 2 39 N 15 1.159 0 -Contig217_chr6_31393824_31394218 97 G A 115.0 chr6 31393921 G 9 0 2 54 19 0 2 84 15 0 2 72 12 0 2 63 7 0 2 48 10 0 2 57 N 45 0.477 0 -Contig186_chr6_31928098_31928245 73 G A 117.0 chr6 -1 N 5 0 2 42 8 0 2 51 2 0 2 33 4 0 2 39 1 0 2 30 5 0 2 42 N -1 0.276 1 -Contig52_chr6_33188498_33188724 123 G A 59.0 chr6 -1 N 5 0 2 42 13 0 2 66 8 0 2 51 4 0 2 39 9 0 2 54 9 0 2 54 N -1 0.880 1 -Contig102_chr6_38743009_38743435 290 A G 178.0 chr6 38743311 A 11 0 2 60 13 0 2 66 9 0 2 54 11 0 2 60 12 0 2 63 13 0 2 66 Y 34 0.148 4 -Contig81_chr6_49018353_49019532 179 C A 72.5 chr6 49018530 A 15 0 2 72 13 0 2 66 19 0 2 72 8 0 2 51 12 0 2 63 16 0 2 75 Y 15 0.145 1 -Contig112_chr6_51024554_51024851 100 A G 121.0 chr6 51024654 A 10 0 2 57 12 0 2 63 9 0 2 54 13 0 2 66 14 0 2 69 17 0 2 78 N 75 4.287 0 -Contig40_chr6_51412751_51413807 227 T C 94.5 chr6 51412975 C 5 0 2 42 8 0 2 51 7 0 2 48 9 0 2 54 11 0 2 60 10 0 2 57 Y 4 5.661 0 -Contig47_chr6_69073222_69074767 1315 T C 212.0 chr6 69074558 T 20 0 2 87 17 0 2 78 18 0 2 81 12 0 2 63 17 0 2 78 7 0 2 48 Y 9 0.652 0 -Contig30_chr6_74848932_74849059 57 C G 46.3 chr6 74848993 C 7 0 2 48 7 0 2 33 6 0 2 45 7 0 2 48 5 0 2 42 6 0 2 45 N -1 +99. 1 -Contig84_chr7_6648683_6650255 1297 G A 110.0 chr7 6649988 G 18 0 2 81 9 0 2 54 22 0 2 77 16 0 2 75 20 0 2 87 6 0 2 45 Y 83 0.166 0 -Contig239_chr7_13007379_13007700 275 A G 39.8 chr7 13007642 A 8 0 2 51 5 0 2 42 8 0 2 51 3 0 2 36 3 0 2 36 5 0 2 42 N 46 1.511 3 -Contig119_chr7_18310707_18310948 23 A T 133.0 chr7 18310729 A 6 0 2 45 5 0 2 42 10 0 2 57 5 0 2 42 2 0 2 33 2 0 2 33 N 4553 +99. 0 -Contig93_chr7_18513377_18513741 173 T C 130.0 chr7 18513533 C 15 0 2 72 11 0 2 60 18 0 2 81 6 0 2 45 10 0 2 57 14 0 2 69 Y 115 0.174 0 -Contig133_chr7_19603333_19603776 414 C G 31.9 chr7 19603734 G 10 0 2 57 4 0 2 39 4 0 2 39 5 0 2 42 9 0 2 54 9 0 2 54 N 78 +99. 5 -Contig132_chr7_20426224_20428145 1815 A G 28.3 chr7 20428041 A 11 1 2 43 12 0 2 63 19 0 2 84 23 0 2 96 14 0 2 69 10 0 2 57 N 11 0.264 0 -Contig206_chr7_26281823_26282074 103 C A 101.0 chr7 26281925 T 11 0 2 60 16 0 2 61 19 0 2 84 6 0 2 45 19 0 2 84 16 0 2 75 N -1 0.947 1 -Contig116_chr7_45858984_45859111 38 T C 73.2 chr7 -1 N 2 0 2 33 1 0 2 30 3 0 2 36 2 0 2 33 2 0 2 33 1 0 2 30 N -1 3.442 0 -Contig38_chr7_50681997_50682600 42 T C 92.4 chr7 50682037 G 6 0 2 45 2 0 2 33 10 0 2 57 12 0 2 63 5 0 2 42 6 0 2 45 Y 94 0.146 0 -Contig55_chr7_53147505_53148974 894 A G 68.4 chr7 53148397 G 22 0 2 93 13 0 2 66 16 0 2 75 8 0 2 51 16 0 2 75 11 0 2 60 Y 19 0.060 0 -Contig4_chr7_53685534_53688206 1709 C G 76.2 chr7 53687225 C 18 0 2 81 17 0 2 78 18 0 2 81 15 0 2 72 14 0 2 69 14 0 2 69 Y 32 0.659 1 -Contig61_chr7_55832923_55834065 506 T C 185.0 chr7 55833450 C 9 0 2 54 10 0 2 57 22 0 2 93 12 0 2 63 12 0 2 63 7 0 2 48 Y 1 0.019 0 -Contig91_chr8_12804505_12805470 409 C A 111.0 chr8 12804906 C 8 0 2 51 10 0 2 57 15 0 2 72 12 0 2 63 14 0 2 69 15 0 2 72 N 145 0.175 0 -Contig30_chr8_17147743_17147923 13 G A 105.0 chr8 17147756 A 1 3 1 19 1 0 2 30 3 0 2 36 1 0 2 30 1 0 2 30 3 0 2 36 N 6 +99. 0 -Contig8_chr8_27811135_27812620 333 C T 37.9 chr8 27811458 C 4 0 2 39 11 0 2 60 18 0 2 81 5 0 2 42 6 0 2 45 5 0 2 42 Y 1 0.272 0 -Contig66_chr8_28273102_28273660 175 G C 81.6 chr8 28273263 T 9 0 2 54 17 0 2 78 19 0 2 84 8 0 2 51 16 0 2 75 19 0 2 84 Y 3 2.735 0 -Contig84_chr8_31375511_31376456 443 T C 125.0 chr8 31375954 T 10 0 2 57 15 0 2 72 27 0 2 108 18 0 2 81 16 0 2 75 9 0 2 54 Y 2 0.650 0 -Contig18_chr8_32575859_32577431 264 T C 151.0 chr8 32576124 T 20 0 2 87 14 0 2 69 17 0 2 78 14 0 2 69 13 0 2 66 14 0 2 69 Y 17 0.915 1 -Contig54_chr8_40913908_40916451 1275 G A 175.0 chr8 40915190 G 10 0 2 57 8 0 2 51 11 0 2 60 7 0 2 48 8 0 2 51 9 0 2 54 Y 21 0.056 3 -Contig93_chr8_44658786_44659075 180 T G 55.3 chr8 44658964 T 4 0 2 39 3 0 2 36 6 0 2 45 5 0 2 45 5 0 2 42 4 0 2 39 N 14 0.188 0 -Contig17_chr8_57490059_57490498 69 G T 97.4 chr8 57490127 A 2 0 2 33 11 0 2 60 15 0 2 72 16 0 2 75 8 0 2 51 10 0 2 57 N 40 0.522 5 -Contig66_chr8_58562376_58563446 345 C G 5.74 chr8 58562721 C 14 0 2 69 12 0 2 63 9 0 2 57 10 0 2 57 9 0 2 54 10 0 2 57 Y 6 0.685 0 -Contig44_chr8_71186368_71188207 1455 G T 147.0 chr8 71187818 G 4 10 1 74 3 0 2 36 20 0 2 87 12 0 2 63 8 0 2 51 10 0 2 57 Y 88 0.036 0 -Contig73_chr9_29451535_29452248 616 A G 24.7 chr9 29452127 G 4 0 2 39 7 0 2 48 1 0 2 30 4 0 2 39 7 0 2 48 6 0 2 45 N 49 0.448 4 -Contig96_chr9_39008495_39009278 215 A C 98.7 chr9 39008708 C 7 0 2 48 13 0 2 66 28 0 2 111 16 0 2 75 17 0 2 78 17 0 2 78 Y 8 0.427 1 -Contig22_chr10_15505382_15505589 172 T C 38.5 chr10 15505548 T 2 0 2 33 6 0 2 45 8 0 2 51 8 0 2 51 9 0 2 54 12 0 2 63 N 284 2.861 0 -Contig69_chr10_40547265_40548153 371 G A 58.1 chr10 40547649 A 9 0 2 54 8 0 2 51 8 0 2 51 9 0 2 54 4 0 2 39 5 0 2 42 Y 20 0.138 4 -Contig63_chr10_42716594_42719945 1018 A G 88.7 chr10 42717616 G 13 0 2 66 14 0 2 69 13 0 2 66 12 0 2 63 18 0 2 81 5 0 2 42 Y 25 1.740 0 -Contig22_chr10_43255307_43255570 81 C A 37.2 chr10 43255383 C 15 0 2 72 18 0 2 81 22 0 2 93 16 0 2 75 11 0 2 60 12 0 2 63 N 62 0.450 0 -Contig9_chr10_51475063_51476054 770 C T 57.3 chr10 51475839 C 6 0 2 45 16 0 2 75 16 0 2 75 13 0 2 66 9 0 2 54 9 2 2 21 N 80 0.394 0 -Contig42_chr10_53816543_53818392 1642 G A 27.5 chr10 53818172 A 7 0 2 48 13 0 2 66 17 0 2 78 14 0 2 69 19 0 2 84 16 0 2 75 N 1 0.433 0 -Contig36_chr10_53992615_53993741 229 G C 86.2 chr10 53992846 G 17 0 2 78 14 0 2 69 13 0 2 66 15 0 2 72 12 0 2 63 15 0 2 72 N 23 1.912 0 -Contig20_chr10_58141129_58141750 575 C T 46.1 chr10 58141701 C 7 0 2 48 8 0 2 51 9 0 2 54 3 0 2 36 4 0 2 39 9 0 2 54 N 1 4.264 0 -Contig26_chr10_59510973_59511899 146 C A 29.0 chr10 59511126 C 8 0 2 51 13 0 2 66 18 0 2 81 13 0 2 66 10 0 2 57 7 0 2 48 Y 208 1.077 0 -Contig72_chr11_7142765_7143772 146 G A 152.0 chr11 7142911 A 8 0 2 51 8 0 2 51 24 0 2 99 10 0 2 57 17 0 2 78 11 0 2 60 Y 90 1.137 0 -Contig103_chr11_8844784_8845095 214 T G 135.0 chr11 8844993 T 1 1 2 12 10 0 2 57 5 4 1 26 2 3 1 13 2 7 1 34 1 1 2 13 Y 75 0.731 0 -Contig9_chr11_9904571_9905983 1284 C T 151.0 chr11 9905857 C 16 0 2 75 19 0 2 84 17 0 2 78 16 0 2 75 12 0 2 63 13 1 2 44 Y 11 0.422 1 -Contig35_chr11_22459883_22460855 714 T G 54.9 chr11 22460577 T 3 0 2 36 1 0 2 30 3 0 2 36 2 0 2 33 2 0 2 33 0 0 -1 0 N 24 0.382 0 -Contig7_chr11_40017076_40017630 352 C T 46.3 chr11 40017422 C 7 0 2 48 9 0 2 54 6 0 2 45 8 0 2 51 16 0 2 75 9 0 2 54 Y 44 0.336 0 -Contig108_chr11_42953408_42955156 367 A G 89.4 chr11 42953779 A 17 0 2 78 11 0 2 60 14 0 2 69 20 0 2 87 14 0 2 69 17 0 2 78 Y 118 0.784 1 -Contig82_chr11_43490732_43490862 60 C T 47.3 chr11 -1 N 0 0 -1 0 0 0 -1 0 1 0 2 30 3 0 2 36 1 1 2 19 1 0 2 30 N -1 6.763 0 -Contig16_chr11_53408448_53408790 187 A G 153.0 chr11 53408638 A 7 0 2 48 9 0 2 54 18 0 2 81 10 0 2 57 11 0 2 60 12 0 2 63 Y 116 1.367 0 -Contig21_chr12_18403415_18404381 586 G T 34.5 chr12 18403983 - 13 0 2 66 16 0 2 75 25 0 2 102 12 0 2 63 12 0 2 63 14 0 2 69 Y 12 0.068 0 -Contig33_chr12_19804073_19804529 178 T C 69.4 chr12 19804261 T 13 0 2 66 13 0 2 66 22 0 2 93 11 0 2 60 12 0 2 63 18 0 2 81 Y 11 1.571 0 -Contig41_chr12_25565452_25566993 475 G T 6.29 chr12 25565926 G 15 0 2 72 14 0 2 69 10 0 2 57 15 0 2 72 18 0 2 81 19 0 2 84 N 10 2.231 1 -Contig9_chr12_27204351_27204696 239 A G 145.0 chr12 27204587 A 7 0 2 48 8 0 2 51 12 0 2 63 8 0 2 51 11 0 2 60 11 0 2 60 Y 14 0.046 0 -Contig45_chr12_30548282_30550498 448 C T 124.0 chr12 30548703 - 9 0 2 54 11 0 2 60 22 0 2 93 19 0 2 84 12 0 2 63 12 0 2 63 Y 66 0.305 0 -Contig46_chr12_35571846_35572563 58 G C 83.2 chr12 35571906 G 4 0 2 39 10 0 2 57 11 0 2 60 6 0 2 45 10 0 2 57 6 0 2 45 Y 55 +99. 1 -Contig28_chr12_42075871_42076044 136 G A 134.0 chr12 42076006 A 6 0 2 45 5 0 2 42 7 0 2 48 7 0 2 48 2 0 2 33 4 0 2 39 N 3 9.479 0 -Contig16_chr12_42386141_42387454 194 A G 161.0 chr12 42386323 A 11 0 2 60 8 0 2 54 23 0 2 96 17 0 2 78 6 0 2 45 13 0 2 66 Y 7 0.927 1 -Contig42_chr12_44424628_44425829 255 A G 84.4 chr12 44424879 A 12 0 2 63 19 0 2 84 23 0 2 96 15 0 2 72 18 0 2 81 14 0 2 69 Y 18 1.190 2 -Contig10_chr12_44447953_44449698 63 C T 105.0 chr12 44448020 C 11 0 2 60 9 0 2 54 12 0 2 63 10 0 2 57 15 0 2 72 8 0 2 51 Y 31 11.791 0 -Contig5_chr12_53880670_53882675 1221 A C 99.4 chr12 53881888 A 16 0 2 75 18 0 2 81 23 0 2 96 10 0 2 57 15 0 2 72 17 0 2 78 Y 31 0.061 0 -Contig86_chr12_56715356_56716464 818 T C 166.0 chr12 56716164 T 20 0 2 87 16 0 2 75 16 0 2 75 14 0 2 69 13 0 2 66 7 0 2 48 Y 22 1.092 0 -Contig3_chr12_65021967_65024097 238 T G 92.6 chr12 65022205 T 17 0 2 78 14 0 2 69 16 0 2 75 9 0 2 54 13 0 2 66 15 0 2 72 Y 258 0.117 0 -Contig43_chr12_66499742_66500010 121 G T 41.5 chr12 66499866 G 12 0 2 63 4 0 2 39 8 0 2 51 6 0 2 45 10 0 2 57 6 0 2 45 N 42 0.421 0 -Contig14_chr12_71364692_71365311 20 A C 103.0 chr12 71364712 A 7 0 2 48 3 0 2 36 5 0 2 42 1 0 2 30 2 0 2 33 3 0 2 36 Y 35 +99. 0 -Contig37_chr13_15910164_15910426 245 G A 32.9 chr13 -1 N 3 4 1 41 4 0 2 39 3 0 2 36 4 0 2 39 3 0 2 36 10 0 2 57 N -1 2.159 1 -Contig107_chr13_26045881_26046290 341 C G 81.4 chr13 26046230 C 16 0 2 75 20 0 2 90 14 0 2 69 15 0 2 72 9 0 2 54 9 0 2 54 Y 51 4.510 0 -Contig251_chr13_28498333_28501066 864 T G 296.0 chr13 28499180 T 3 0 2 36 5 0 2 42 4 0 2 39 2 0 2 33 5 0 2 42 6 0 2 45 Y 9 0.068 0 -Contig154_chr13_36777857_36778736 356 G A 95.5 chr13 36778225 A 6 0 2 45 11 0 2 60 11 0 2 60 9 0 2 54 13 0 2 66 8 0 2 51 Y 59 0.192 0 -Contig37_chr13_42529793_42530857 150 G T 192.0 chr13 42529926 G 18 0 2 81 14 0 2 69 16 0 2 75 14 0 2 69 8 0 2 51 11 0 2 60 N 22 0.795 5 -Contig47_chr13_47045833_47046626 257 A C 28.5 chr13 47046097 A 13 0 2 66 10 0 2 57 17 0 2 78 20 0 2 87 15 0 2 72 9 0 2 57 N 129 0.468 0 -Contig42_chr13_47730018_47730856 254 A G 75.1 chr13 47730294 A 13 0 2 66 6 0 2 45 12 0 2 63 9 0 2 54 16 0 2 75 11 0 2 63 Y 630 0.049 1 -Contig55_chr13_53467708_53468101 221 T G 132.0 chr13 53467925 T 25 0 2 102 12 0 2 63 26 0 2 105 7 0 2 48 16 0 2 75 16 0 2 75 N 20 5.717 1 -Contig49_chr13_55103679_55105532 503 G A 76.0 chr13 55104178 G 21 0 2 90 19 0 2 84 18 0 2 81 20 0 2 87 8 9 1 89 17 0 2 78 Y 20 0.259 1 -Contig42_chr13_64785759_64786045 14 C G 22.8 chr13 64785772 C 2 0 2 33 2 0 2 33 4 0 2 39 7 0 2 48 8 0 2 51 2 0 2 33 N 527 +99. 1 -Contig66_chr13_66021813_66022244 319 C T 125.0 chr13 66022136 C 11 0 2 60 16 0 2 75 15 0 2 75 12 0 2 63 17 0 2 78 8 0 2 51 N 14 0.055 3 -Contig48_chr14_11839435_11843272 3014 A G 163.0 chr14 11842446 A 10 0 2 57 8 0 2 51 13 0 2 66 10 0 2 57 5 0 2 42 10 0 2 57 Y 31 0.908 0 -Contig9_chr14_23353717_23354432 80 G A 61.3 chr14 23353797 G 3 0 2 36 6 0 2 45 11 0 2 60 8 0 2 51 4 0 2 39 2 4 1 35 Y 11 0.444 0 -Contig14_chr14_24131180_24133488 1633 G A 131.0 chr14 24132818 G 21 0 2 90 16 0 2 75 12 0 2 63 10 0 2 57 11 0 2 60 20 0 2 87 Y 36 0.347 0 -Contig28_chr14_26905747_26909514 975 G C 3.13 chr14 26906723 G 16 0 2 75 10 0 2 57 12 0 2 63 15 0 2 72 10 0 2 57 7 0 2 48 N 287 0.117 2 -Contig14_chr14_29616948_29618316 109 G A 80.3 chr14 29617053 - 17 0 2 78 16 0 2 75 16 0 2 75 10 0 2 57 17 0 2 78 19 0 2 84 Y 32 1.051 0 -Contig24_chr14_29728478_29728839 242 T A 107.0 chr14 29728724 T 2 0 2 33 12 0 2 63 10 0 2 57 12 0 2 63 5 0 2 42 9 0 2 54 N 70 2.712 0 -Contig76_chr14_30028102_30029179 1046 C T 38.5 chr14 30029169 T 3 0 2 36 6 0 2 45 9 0 2 54 7 0 2 48 9 0 2 54 8 0 2 51 Y 96 +99. 0 -Contig115_chr14_31417207_31417574 259 A G 12.1 chr14 31417454 G 13 0 2 66 15 0 2 72 21 0 2 90 12 0 2 63 13 0 2 66 9 0 2 54 N 28 5.379 2 -Contig70_chr14_46653662_46653790 111 G A 46.7 chr14 46653768 G 7 0 2 48 5 0 2 42 11 0 2 60 11 0 2 60 8 0 2 51 10 0 2 57 N 21 +99. 2 -Contig43_chr14_49991855_49993511 918 A G 112.0 chr14 49992767 G 15 0 2 72 10 0 2 57 11 0 2 63 9 0 2 54 12 0 2 63 9 0 2 54 Y 6 0.314 1 -Contig64_chr14_56768376_56768902 473 C T 29.0 chr14 56768832 C 15 0 2 72 11 0 2 60 14 0 2 69 14 0 2 69 7 0 2 48 9 0 2 54 Y 91 8.281 0 -Contig60_chr15_18493036_18494316 150 G A 92.6 chr15 18493188 G 9 0 2 54 13 0 2 66 9 0 2 54 6 0 2 45 5 0 2 42 12 0 2 63 Y 45 0.125 0 -Contig213_chr15_19567788_19568626 196 A C 13.9 chr15 19567992 A 4 0 2 39 2 0 2 33 7 0 2 48 4 0 2 39 4 0 2 39 6 0 2 45 Y 111 0.043 0 -Contig59_chr15_22138344_22138535 120 G C 142.0 chr15 22138470 C 11 0 2 60 10 0 2 57 18 0 2 81 4 0 2 39 10 0 2 57 15 0 2 72 N 8 2.553 0 -Contig112_chr15_26772864_26773267 374 C T 21.6 chr15 26773244 C 4 0 2 39 4 0 2 39 5 0 2 42 2 0 2 33 4 0 2 39 3 0 2 36 N 18 +99. 0 -Contig24_chr15_26894765_26895003 155 G A 87.6 chr15 -1 N 6 0 2 45 5 0 2 42 7 0 2 48 4 0 2 39 4 0 2 39 2 0 2 33 N -1 0.178 0 -Contig2_chr15_33944796_33947182 1860 G A 99.5 chr15 33946654 G 10 0 2 57 11 0 2 60 16 0 2 75 14 0 2 69 14 0 2 69 16 0 2 75 Y 16 0.252 0 -Contig73_chr15_34690052_34691332 714 T C 130.0 chr15 34690769 T 7 0 2 48 7 0 2 48 17 0 2 78 9 0 2 54 9 0 2 54 4 0 2 39 Y 7 6.003 0 -Contig68_chr15_37747190_37747426 126 G A 130.0 chr15 37747331 G 14 0 2 69 14 0 2 69 11 0 2 63 19 0 2 84 13 0 2 66 21 0 2 90 N 229 0.255 0 -Contig35_chr15_41400484_41400672 160 A C 143.0 chr15 -1 N 1 0 2 30 2 0 2 33 0 0 -1 0 2 0 2 33 3 0 2 36 2 0 2 33 N -1 +99. 0 -Contig104_chr15_45106954_45107158 70 A T 64.4 chr15 45107015 A 6 0 2 45 6 0 2 45 19 0 2 84 7 0 2 48 7 0 2 48 3 0 2 36 N 202 4.319 0 -Contig119_chr16_6160274_6160477 180 G A 54.8 chr16 6160457 G 7 0 2 48 6 0 2 45 12 0 2 63 3 0 2 36 11 0 2 60 10 0 2 57 N 42 +99. 0 -Contig126_chr16_10611887_10612152 150 G T 145.0 chr16 10612037 G 14 0 2 69 9 0 2 54 11 0 2 63 8 0 2 51 8 0 2 51 11 0 2 60 N 15 0.104 6 -Contig114_chr16_12565220_12565676 10 G A 134.0 chr16 12565230 G 0 0 -1 0 2 0 2 33 2 0 2 33 0 0 -1 0 1 0 2 30 1 0 2 30 N 333 +99. 0 -Contig43_chr16_20200090_20200514 70 A G 58.6 chr16 20200154 A 11 0 2 60 15 0 2 72 15 0 2 72 6 0 2 45 9 0 2 54 12 0 2 63 Y 2 0.466 1 -Contig60_chr16_28079136_28080263 588 T G 157.0 chr16 28079739 T 22 0 2 93 20 0 2 87 22 0 2 93 17 0 2 78 12 0 2 63 10 0 2 57 Y 105 5.999 1 -Contig70_chr16_33758668_33759655 104 A T 58.1 chr16 33758772 A 6 0 2 45 7 0 2 48 17 0 2 78 14 0 2 69 8 0 2 51 10 0 2 57 N 54 0.162 0 -Contig66_chr16_37935682_37935831 116 T C 99.2 chr16 37935802 C 12 0 2 63 6 0 2 45 19 0 2 84 12 0 2 63 13 0 2 66 17 0 2 78 N 266 +99. 2 -Contig16_chr16_40451506_40451643 84 A G 59.8 chr16 40451592 A 7 0 2 48 5 0 2 42 7 0 2 48 13 0 2 66 14 0 2 69 19 0 2 84 N 45 5.061 0 -Contig53_chr16_49888293_49888587 260 G A 108.0 chr16 49888550 A 4 0 2 39 1 0 2 30 3 0 2 36 5 0 2 42 2 0 2 33 2 0 2 33 Y 9 0.261 1 -Contig31_chr17_12128267_12129637 205 G A 90.5 chr17 12128484 G 7 0 2 48 6 0 2 45 6 0 2 45 11 0 2 60 7 0 2 48 4 0 2 39 Y 10 0.246 0 -Contig50_chr17_12247973_12249183 889 G T 47.6 chr17 12248878 G 0 1 2 9 8 0 2 51 9 2 2 21 7 2 2 21 15 0 2 72 0 3 0 9 Y 1 1.181 0 -Contig1_chr17_12979232_12980380 808 G T 12.3 chr17 12980028 G 18 0 2 81 12 0 2 63 21 0 2 90 13 0 2 66 22 0 2 93 18 0 2 81 Y 9 0.336 1 -Contig63_chr17_14186372_14186928 54 C T 70.7 chr17 14186427 C 6 0 2 45 2 0 2 33 5 0 2 42 6 0 2 45 3 0 2 36 3 0 2 36 Y 11 0.560 3 -Contig42_chr17_23434859_23438330 2100 C T 39.5 chr17 23436985 T 4 0 2 39 7 0 2 48 7 0 2 48 3 0 2 36 6 0 2 45 2 0 2 33 Y 25 0.344 0 -Contig63_chr17_23796320_23796814 220 A G 54.0 chr17 23796536 G 6 0 2 45 4 0 2 39 5 0 2 42 6 0 2 45 4 0 2 39 6 0 2 45 Y 139 0.067 1 -Contig76_chr17_24107434_24107834 316 T C 141.0 chr17 24107726 T 19 0 2 84 15 0 2 72 20 0 2 87 16 0 2 75 11 0 2 60 18 0 2 81 Y 30 0.175 2 -Contig99_chr17_26021506_26022200 505 C T 88.8 chr17 26022017 T 15 0 2 72 13 0 2 66 19 0 2 84 9 0 2 54 10 0 2 57 11 0 2 60 Y 1 0.172 1 -Contig59_chr17_26790302_26795045 287 C T 45.1 chr17 26790582 C 8 0 2 51 6 0 2 45 13 0 2 66 6 0 2 45 15 0 2 72 12 0 2 63 Y 75 0.019 1 -Contig99_chr17_27018324_27019378 446 G A 31.1 chr17 27018776 G 14 0 2 69 12 0 2 63 14 0 2 69 10 0 2 57 9 0 2 54 11 0 2 60 Y 13 0.290 4 -Contig125_chr17_27739115_27739410 63 G A 107.0 chr17 27739177 G 8 0 2 51 11 0 2 60 16 0 2 75 8 0 2 51 4 0 2 39 15 0 2 72 N 100 0.819 0 -Contig115_chr17_37489899_37490101 159 G A 62.4 chr17 37490067 G 4 0 2 39 3 0 2 36 4 0 2 39 4 0 2 39 3 0 2 36 6 0 2 45 N 4 1.411 1 -Contig180_chr17_45154356_45154925 524 A G 146.0 chr17 45154886 G 7 0 2 48 9 0 2 54 7 0 2 48 9 0 2 54 4 0 2 39 8 0 2 51 Y 11 +99. 2 -Contig61_chr17_48221795_48223545 1404 T A 177.0 chr17 48223216 T 15 0 2 72 14 0 2 69 24 0 2 99 17 0 2 78 18 0 2 81 24 0 2 99 Y 161 0.633 2 -Contig27_chr17_61713766_61716585 1056 G C 40.0 chr17 61714821 G 4 0 2 39 8 0 2 51 10 0 2 57 6 0 2 45 6 0 2 45 3 0 2 36 N 6 2.200 4 -Contig229_chr18_3706523_3708577 1076 A G 83.9 chr18 3707630 A 11 0 2 60 13 0 2 66 26 0 2 105 11 0 2 60 15 0 2 72 17 0 2 78 Y 63 0.445 0 -Contig24_chr18_14049894_14050480 24 A G 123.0 chr18 14049918 A 5 0 2 42 5 0 2 42 4 0 2 39 6 0 2 45 7 0 2 48 5 0 2 42 Y 17 +99. 0 -Contig30_chr18_18771753_18772121 39 C G 48.5 chr18 18771787 C 2 0 2 33 5 0 2 42 2 0 2 33 6 0 2 45 3 0 2 36 2 0 2 33 N 5 0.135 0 -Contig123_chr18_19916160_19916379 116 G A 79.2 chr18 19916272 A 14 0 2 69 12 0 2 63 14 0 2 69 6 0 2 45 11 0 2 60 10 0 2 57 N 26 0.172 0 -Contig82_chr18_27305489_27306229 566 C T 49.5 chr18 27306051 A 6 0 2 45 6 0 2 45 10 0 2 57 11 0 2 60 6 0 2 45 7 0 2 48 N 1 0.349 0 -Contig71_chr18_34324706_34326687 136 G A 151.0 chr18 34324841 G 9 0 2 54 9 0 2 54 17 0 2 78 8 0 2 51 11 0 2 60 10 0 2 57 Y 2 2.129 2 -Contig16_chr18_34672093_34673044 538 T C 58.2 chr18 34672635 T 8 0 2 51 15 0 2 72 16 0 2 75 15 0 2 72 9 0 2 57 18 0 2 81 Y 8 0.214 1 -Contig96_chr18_38492535_38493333 624 G A 119.0 chr18 38493162 T 17 0 2 78 12 0 2 63 13 0 2 66 16 0 2 75 8 0 2 51 15 0 2 72 Y 127 0.131 0 -Contig226_chr18_47753756_47754666 427 T C 21.1 chr18 47754215 T 10 0 2 57 4 0 2 39 8 0 2 51 5 0 2 42 6 0 2 45 7 0 2 48 Y 42 0.522 0 -Contig170_chr18_49411558_49412230 94 C A 74.3 chr18 49411655 C 14 0 2 69 10 0 2 57 9 0 2 54 10 0 2 57 3 0 2 36 3 0 2 36 N 9 1.457 0 -Contig192_chr18_49419342_49420737 1058 C T 42.8 chr18 49420381 A 3 0 2 36 4 0 2 39 5 0 2 42 8 0 2 51 3 0 2 36 3 0 2 36 Y 34 2.107 2 -Contig64_chr18_55979770_55980315 49 G A 89.1 chr18 55979824 G 3 0 2 36 9 0 2 54 7 0 2 51 4 0 2 39 3 0 2 36 3 0 2 36 Y -1 2.124 0 -Contig20_chr18_58130301_58130735 112 A G 74.4 chr18 58130413 A 12 0 2 66 11 0 2 60 11 0 2 60 12 0 2 63 6 0 2 45 6 0 2 45 Y 10 0.290 0 -Contig146_chr19_5221790_5223013 143 A G 114.0 chr19 5221916 - 1 0 2 30 4 0 2 39 3 0 2 36 5 0 2 42 2 0 2 33 5 0 2 42 Y 12 0.870 0 -Contig13_chr19_7739961_7740118 26 C G 220.0 chr19 -1 N 3 0 2 36 1 0 2 30 2 0 2 33 3 0 2 36 1 0 2 30 2 0 2 33 N -1 +99. 0 -Contig67_chr19_12398520_12399367 499 C T 161.0 chr19 12399017 C 10 0 2 57 11 0 2 60 20 0 2 87 14 0 2 69 24 0 2 99 8 0 2 51 Y 137 5.634 0 -Contig66_chr19_16285672_16287223 996 C T 190.0 chr19 16286674 C 9 0 2 57 14 0 2 69 16 0 2 78 17 0 2 78 8 0 2 51 22 0 2 93 Y 40 0.110 0 -Contig129_chr19_25541958_25542221 202 T C 68.1 chr19 25542154 C 11 0 2 60 19 0 2 84 10 0 2 60 17 0 2 78 9 0 2 54 12 0 2 63 N -1 2.551 1 -Contig152_chr19_34274440_34275622 1072 C T 48.0 chr19 34275509 T 1 0 2 30 2 0 2 33 1 0 2 30 1 0 2 30 4 0 2 39 5 0 2 42 N 71 0.309 0 -Contig29_chr19_37339947_37341911 1692 C T 211.0 chr19 37341631 C 15 0 2 72 20 0 2 87 11 0 2 60 15 0 2 72 3 0 2 36 12 0 2 63 Y 7 0.096 0 -Contig39_chr19_47709708_47711327 444 C T 36.8 chr19 47710148 T 10 0 2 57 4 0 2 39 8 0 2 51 9 0 2 54 6 0 2 45 6 0 2 45 Y 95 1.251 1 -Contig60_chr19_54013816_54014398 281 A G 138.0 chr19 54014103 C 6 0 2 45 15 0 2 72 7 0 2 48 10 0 2 57 15 0 2 72 10 0 2 57 Y 188 1.271 0 -Contig251_chr19_56559098_56559626 452 T C 3.36 chr19 56559549 T 12 0 2 63 13 0 2 66 21 0 2 90 15 0 2 72 14 0 2 69 11 0 2 60 N 1 0.117 0 -Contig50_chr20_12138509_12141975 3206 C A 248.0 chr20 12141763 C 8 0 2 51 15 0 2 72 14 0 2 69 6 0 2 45 10 0 2 57 7 0 2 48 Y 2 0.384 0 -Contig36_chr20_32631363_32632049 176 G A 24.1 chr20 32631526 G 7 0 2 48 14 0 2 69 19 0 2 84 14 0 2 69 15 0 2 72 16 0 2 75 N 50 1.150 0 -Contig39_chr20_36316398_36316498 57 C T 30.3 chr20 36316455 C 2 0 2 33 0 1 2 8 0 0 -1 0 0 1 2 10 0 0 -1 0 0 0 -1 0 N -483 +99. 0 -Contig32_chr20_36468058_36468869 66 C T 40.4 chr20 36468127 C 6 0 2 45 3 0 2 36 4 0 2 39 5 0 2 42 3 0 2 36 4 0 2 39 N 59 0.281 0 -Contig24_chr20_38203888_38204900 834 C T 132.0 chr20 38204731 C 9 0 2 54 17 0 2 78 20 0 2 87 8 0 2 51 11 0 2 60 17 0 2 78 Y 14 0.397 0 -Contig79_chr20_44263127_44264103 456 G T 31.5 chr20 44263573 G 22 0 2 93 16 0 2 75 15 0 2 72 19 0 2 84 13 0 2 66 26 0 2 105 Y 8 3.250 0 -Contig26_chr20_45878482_45878787 197 A G 160.0 chr20 45878672 A 17 0 2 78 15 0 2 72 11 0 2 63 17 0 2 78 12 0 2 63 10 0 2 57 N 14 0.535 0 -Contig119_chr20_46550670_46551383 609 G A 139.0 chr20 46551277 G 7 0 2 48 17 0 2 78 19 0 2 84 20 0 2 87 9 0 2 54 15 0 2 72 Y 7 0.488 1 -Contig50_chr21_4178523_4178687 121 G A 362.0 chr21 4178640 G 8 0 2 51 14 0 2 69 5 0 2 42 3 0 2 36 11 0 2 60 4 0 2 39 N 392 0.483 0 -Contig103_chr21_10177255_10177765 121 G A 125.0 chr21 10177367 G 12 0 2 63 10 0 2 57 10 0 2 57 17 0 2 78 14 0 2 69 7 0 2 51 Y 37 0.213 3 -Contig1_chr21_10805534_10806399 766 A G 146.0 chr21 10806301 G 10 0 2 57 6 0 2 45 9 0 2 54 6 0 2 45 7 0 2 48 5 0 2 42 Y 20 0.319 0 -Contig46_chr21_21029492_21030645 443 C T 5.37 chr21 21029910 C 15 0 2 72 11 0 2 60 16 0 2 75 15 0 2 72 13 0 2 66 6 0 2 45 Y 96 3.737 0 -Contig129_chr21_31045749_31046924 381 A G 129.0 chr21 31046141 A 19 0 2 84 8 0 2 51 23 0 2 96 12 0 2 63 15 0 2 72 18 0 2 81 Y 69 0.028 2 -Contig23_chr21_31651123_31651986 840 C T 71.3 chr21 31651957 T 6 0 2 45 9 0 2 54 8 0 2 51 10 0 2 57 4 0 2 39 7 0 2 48 Y 105 2.977 3 -Contig64_chr21_43341847_43342031 84 T C 114.0 chr21 43341926 T 11 0 2 60 9 0 2 54 10 0 2 57 6 0 2 45 6 0 2 45 7 0 2 48 N 10 3.954 2 -Contig60_chr21_43475347_43475824 175 C T 8.05 chr21 43475551 T 6 0 2 45 7 0 2 48 13 0 2 66 6 0 2 45 14 0 2 69 14 0 2 69 N 45 0.058 0 -Contig64_chr21_45377513_45377872 19 C T 60.7 chr21 -1 N 3 0 2 36 2 0 2 33 1 0 2 30 0 0 -1 0 3 0 2 36 1 0 2 30 N -1 +99. 1 -Contig159_chr22_7896450_7896974 109 G C 151.0 chr22 7896570 G 16 0 2 75 5 7 1 62 14 0 2 69 16 0 2 75 13 0 2 66 13 0 2 66 Y 16 0.465 0 -Contig46_chr22_9416920_9417467 381 G A 145.0 chr22 9417259 G 10 0 2 57 9 0 2 54 10 0 2 57 6 0 2 45 13 0 2 66 7 0 2 48 Y 154 0.242 0 -Contig86_chr22_9440787_9441725 713 T G 119.0 chr22 9441488 G 6 0 2 45 12 0 2 63 10 0 2 57 11 0 2 60 13 0 2 66 16 0 2 75 Y 132 0.218 0 -Contig16_chr22_15636960_15637372 236 A C 9.79 chr22 15637192 T 4 0 2 39 5 0 2 42 12 0 2 63 7 0 2 48 6 0 2 45 11 0 2 60 Y 5 2.163 0 -Contig4_chr22_16114310_16114546 128 G C 101.0 chr22 16114432 G 10 0 2 57 13 0 2 66 20 0 2 87 20 0 2 87 16 0 2 75 9 0 2 54 N 19 0.526 0 -Contig23_chr22_34612023_34612568 167 C G 92.3 chr22 34612181 C 11 0 2 60 18 0 2 81 13 0 2 66 8 0 2 51 12 0 2 63 14 0 2 69 Y 7 0.409 0 -Contig4_chr22_38252245_38253712 799 A C 159.0 chr22 38253064 A 18 0 2 81 15 0 2 72 15 0 2 72 20 0 2 87 27 0 2 108 15 0 2 72 Y 90 4.330 0 -Contig122_chr22_48412466_48414788 1888 C T 125.0 chr22 48414355 T 16 0 2 75 15 0 2 72 16 0 2 75 14 0 2 72 12 0 2 63 7 0 2 48 N 42 0.122 0 -Contig77_chr22_49764414_49764875 353 C A 148.0 chr22 49764777 C 7 4 1 65 18 0 2 81 16 0 2 75 20 0 2 87 4 3 1 52 9 4 1 67 Y 12 0.941 0 -Contig26_chr22_57817664_57819633 1453 A G 150.0 chr22 57819121 G 9 0 2 54 9 0 2 54 13 0 2 66 15 0 2 72 11 0 2 60 14 0 2 69 N 15 0.471 1 -Contig348_chr22_62406104_62406495 189 C A 134.0 chr22 62406302 A 9 0 2 54 14 0 2 69 11 0 2 60 10 0 2 57 12 0 2 63 6 0 2 45 Y 5 0.912 0 -Contig133_chr23_3525134_3526502 1223 A G 201.0 chr23 3526387 A 11 0 2 60 13 0 2 66 23 0 2 96 21 0 2 90 13 0 2 66 10 0 2 57 Y 61 1.359 0 -Contig111_chr23_7058063_7058181 107 G A 108.0 chr23 7058162 A 8 0 2 51 8 0 2 51 7 0 2 48 2 0 2 33 5 0 2 42 6 0 2 45 N 3 +99. 0 -Contig79_chr23_7844129_7844837 110 C A 141.0 chr23 7844237 T 13 0 2 66 15 0 2 72 17 0 2 78 12 0 2 63 15 0 2 72 16 0 2 75 Y 40 0.339 0 -Contig38_chr23_9201002_9201725 597 C T 155.0 chr23 9201609 T 17 0 2 78 8 0 2 51 13 0 2 66 5 0 2 42 11 0 2 60 7 0 2 48 Y 167 0.633 1 -Contig33_chr23_20672540_20674320 347 T A 91.4 chr23 20672885 A 11 0 2 60 14 0 2 69 15 0 2 72 7 0 2 48 12 0 2 63 18 0 2 81 Y 31 0.452 1 -Contig35_chr23_28447813_28449115 70 T A 21.3 chr23 28447881 T 9 0 2 54 8 0 2 51 10 0 2 57 9 0 2 54 10 0 2 57 12 0 2 63 N 251 0.163 1 -Contig51_chr23_30590939_30591162 140 C T 142.0 chr23 30591080 C 14 0 2 69 4 0 2 39 10 0 2 57 12 0 2 63 14 0 2 69 4 0 2 39 N 13 1.658 0 -Contig57_chr23_32216351_32216721 179 T G 143.0 chr23 32216534 T 15 0 2 72 15 0 2 72 23 0 2 96 13 0 2 66 16 0 2 75 15 0 2 72 N 32 1.387 1 -Contig93_chr23_35744841_35745791 40 A T 30.4 chr23 35744880 T 6 0 2 45 7 0 2 48 7 0 2 48 2 0 2 33 5 0 2 42 5 0 2 42 Y 50 2.173 0 -Contig99_chr23_42543966_42544147 14 G A 357.0 chr23 42543980 G 4 0 2 39 2 0 2 33 3 0 2 36 3 0 2 36 1 0 2 30 2 0 2 33 N 69 +99. 0 -Contig32_chr23_48285289_48286638 186 T C 176.0 chr23 48285470 T 18 0 2 81 12 0 2 63 16 0 2 75 13 0 2 66 9 0 2 54 9 0 2 54 Y 4 4.238 1 -Contig50_chr24_22515247_22516072 761 C T 243.0 chr24 22515981 T 11 0 2 60 10 0 2 57 8 0 2 51 9 0 2 54 18 0 2 81 8 0 2 51 Y 1 0.190 0 -Contig92_chr24_28935897_28936321 13 G A 47.1 chr24 -1 N 2 0 2 33 1 0 2 30 0 0 -1 0 0 0 -1 0 1 0 2 30 0 0 -1 0 Y -1 +99. 2 -Contig84_chr24_29196623_29199644 466 C T 126.0 chr24 29197091 T 7 0 2 48 11 0 2 60 8 0 2 51 7 0 2 48 11 0 2 60 15 0 2 72 Y 42 0.215 0 -Contig35_chr24_30150986_30151507 492 A C 114.0 chr24 30151448 A 5 0 2 42 2 0 2 33 2 0 2 33 3 0 2 36 3 0 2 36 5 0 2 42 N 41 2.587 6 -Contig61_chr24_30465488_30465834 149 G T 68.2 chr24 30465637 G 13 0 2 66 4 2 2 11 18 0 2 81 11 0 2 60 11 0 2 60 9 0 2 54 N 99 0.105 2 -Contig145_chr24_34778364_34778898 163 T C 372.0 chr24 34778541 C 10 0 2 57 8 0 2 51 12 0 2 63 12 0 2 63 6 1 2 31 7 0 2 48 Y 40 0.037 0 -Contig34_chr24_36147443_36150244 2679 C T 140.0 chr24 36150125 C 13 0 2 66 7 0 2 48 14 0 2 69 14 0 2 69 10 0 2 57 13 0 2 66 N 282 0.099 1 -Contig164_chr24_46598127_46599206 84 C T 105.0 chr24 46598214 C 13 0 2 66 12 0 2 63 15 0 2 72 15 0 2 72 11 0 2 60 8 0 2 51 Y 22 1.262 1 -Contig144_chr25_4011170_4013134 541 A G 160.0 chr25 4011690 A 12 0 2 63 17 0 2 78 13 0 2 66 13 0 2 66 13 0 2 66 13 0 2 66 Y 5 0.087 0 -Contig81_chr25_6103472_6104760 699 G A 378.0 chr25 6104190 A 14 0 2 69 16 0 2 75 13 0 2 66 11 0 2 60 11 0 2 60 12 0 2 63 Y 33 0.789 2 -Contig152_chr25_7486442_7487609 75 A G 11.6 chr25 7486515 A 17 0 2 78 13 0 2 66 8 0 2 51 16 0 2 75 8 0 2 51 6 0 2 45 N 2 0.158 0 -Contig24_chr25_7695778_7698612 2714 C T 130.0 chr25 7698446 C 16 0 2 75 13 0 2 66 22 0 2 93 17 0 2 78 10 0 2 57 17 0 2 78 Y 27 0.346 0 -Contig89_chr25_8635170_8636009 586 G C 209.0 chr25 8635744 G 13 0 2 66 13 0 2 66 21 0 2 93 14 0 2 69 15 0 2 72 15 0 2 72 Y 14 0.067 0 -Contig77_chr25_10796299_10796481 2 T C 17.3 chr25 -1 N 1 0 2 30 0 0 -1 0 1 0 2 30 0 0 -1 0 0 0 -1 0 0 0 -1 0 N -1 +99. 0 -Contig73_chr25_14177327_14177474 125 A C 6.85 chr25 14177464 A 0 0 -1 0 0 0 -1 0 0 0 -1 0 0 0 -1 0 0 0 -1 0 0 0 -1 0 N 27 +99. 1 -Contig59_chr25_18196776_18197707 785 G A 112.0 chr25 18197551 G 8 10 1 42 27 0 2 108 21 0 2 90 18 0 2 81 10 0 2 57 14 0 2 69 N 36 3.625 0 -Contig103_chr25_38891221_38892140 407 G A 131.0 chr25 38891644 G 8 0 2 51 14 0 2 69 18 0 2 81 8 0 2 51 8 0 2 51 11 0 2 60 Y 149 0.167 4 -Contig84_chr25_42407960_42408708 55 C T 119.0 chr25 42408013 C 6 0 2 45 9 0 2 54 11 0 2 60 9 0 2 54 7 0 2 48 8 0 2 51 Y 11 0.121 0 -Contig73_chr25_43562500_43564110 955 T C 52.1 chr25 43563469 C 9 0 2 57 4 0 2 39 6 0 2 45 5 0 2 42 7 0 2 48 10 0 2 57 Y 4 1.406 0 -Contig37_chr25_51074433_51074885 170 A G 102.0 chr25 51074589 G 11 0 2 60 7 0 2 48 6 0 2 45 15 0 2 72 9 0 2 54 7 0 2 48 Y 68 0.207 1 -Contig204_chr26_4311195_4311778 170 C T 16.9 chr26 4311363 T 20 0 2 87 8 0 2 51 13 0 2 66 18 0 2 81 11 0 2 60 14 0 2 69 N 35 0.085 0 -Contig122_chr26_7622321_7623491 106 C G 139.0 chr26 7622423 C 3 0 2 36 9 0 2 54 10 0 2 57 12 0 2 63 9 0 2 54 5 0 2 42 N 19 0.458 0 -Contig11_chr26_11062142_11062902 707 C A 108.0 chr26 11062836 T 7 0 2 48 8 0 2 51 16 0 2 75 10 0 2 57 6 0 2 45 14 0 2 69 Y -1 4.709 0 -Contig133_chr26_17695661_17696368 39 T G 98.7 chr26 17695700 T 10 0 2 57 3 0 2 36 11 0 2 60 9 0 2 54 2 0 2 33 1 0 2 30 N 85 3.402 0 -Contig157_chr26_23894107_23895229 25 C T 50.2 chr26 23894140 C 0 0 -1 0 4 0 2 39 2 0 2 33 4 0 2 39 3 0 2 36 3 0 2 36 Y 51 +99. 0 -Contig146_chr26_26622638_26623906 574 G A 186.0 chr26 26623219 A 11 0 2 60 12 0 2 63 9 0 2 54 11 0 2 60 9 0 2 54 12 0 2 63 Y 1 0.318 0 -Contig8_chr26_27834126_27834326 140 G A 41.7 chr26 27834268 G 13 0 2 66 7 0 2 48 13 0 2 66 11 0 2 60 12 0 2 63 6 0 2 45 N 29 0.142 1 -Contig78_chr26_31128839_31129005 123 T C 145.0 chr26 -1 N 11 0 2 60 3 0 2 36 7 0 2 48 8 0 2 51 10 0 2 46 7 0 2 48 N -1 1.230 1 -Contig28_chr26_32935355_32935833 289 T C 77.9 chr26 32935638 T 15 0 2 72 22 0 2 93 15 0 2 72 9 0 2 54 15 0 2 72 17 0 2 78 Y 10 2.258 1 -Contig36_chr26_36606876_36607240 115 A T 139.0 chr26 36606979 A 1 0 2 30 7 0 2 48 14 0 2 69 13 0 2 66 9 0 2 54 3 0 2 36 Y 8 0.071 0 -Contig135_chr27_6853874_6854079 158 C T 116.0 chr27 6854032 T 18 0 2 81 19 0 2 84 13 0 2 66 7 0 2 48 8 0 2 51 11 0 2 60 N 4 0.060 1 -Contig47_chr27_11777710_11777915 25 A G 67.3 chr27 11777731 A 3 0 2 36 5 0 2 42 6 0 2 45 10 0 2 57 9 0 2 54 6 0 2 45 N 97 +99. 0 -Contig23_chr27_14633002_14633153 23 G A 128.0 chr27 14633023 A 3 0 2 36 4 0 2 39 5 0 2 42 5 0 2 42 3 0 2 36 2 0 2 33 N 240 3.881 0 -Contig31_chr27_14987233_14988055 630 A G 48.5 chr27 14987850 G 10 0 2 57 2 0 2 33 4 0 2 39 4 0 2 39 1 0 2 30 4 0 2 39 Y 9 0.089 1 -Contig29_chr27_15428166_15429413 380 T C 140.0 chr27 15428539 T 15 0 2 72 15 0 2 72 17 0 2 78 15 0 2 72 15 0 2 72 15 0 2 72 Y 47 0.916 1 -Contig31_chr27_19519489_19520891 129 G T 14.9 chr27 19519624 T 12 0 2 63 19 0 2 84 20 0 2 87 16 0 2 75 10 0 2 57 11 0 2 60 Y 48 2.756 0 -Contig64_chr27_34654435_34654621 132 C A 115.0 chr27 34654567 T 2 0 2 33 2 0 2 33 5 0 2 42 3 0 2 36 3 0 2 36 8 0 2 51 N 12 0.297 1 -Contig35_chr27_40596169_40596445 20 G C 133.0 chr27 40596189 G 8 0 2 51 3 0 2 36 4 0 2 39 2 0 2 33 4 0 2 39 4 0 2 39 Y 4 +99. 1 -Contig85_chr27_45471750_45472022 211 G A 53.1 chr27 45471964 G 18 0 2 81 10 0 2 57 15 0 2 72 0 13 0 36 16 0 2 75 14 0 2 69 N 75 2.502 1 -Contig131_chr28_6481806_6483783 138 C T 36.2 chr28 6481953 C 12 0 2 63 12 0 2 63 20 0 2 87 11 0 2 60 10 0 2 57 12 0 2 63 Y 10 0.387 0 -Contig141_chr28_10027332_10028242 780 T G 74.8 chr28 10028095 T 10 0 2 57 11 0 2 60 14 0 2 69 10 0 2 57 7 0 2 48 9 0 2 54 Y 19 3.348 0 -Contig144_chr28_15468203_15470548 743 G A 20.0 chr28 15468942 G 13 0 2 66 12 0 2 63 10 0 2 57 11 0 2 60 16 0 2 75 7 0 2 48 N 14 0.053 0 -Contig47_chr28_21311718_21312366 541 G A 116.0 chr28 21312258 G 9 0 2 54 6 0 2 45 12 0 2 63 6 0 2 45 5 0 2 45 12 0 2 63 N 9 0.240 0 -Contig60_chr28_30197166_30197364 92 T C 164.0 chr28 30197258 T 10 0 2 57 13 0 2 66 15 0 2 72 16 0 2 75 12 0 2 63 11 0 2 60 N 369 1.139 0 -Contig201_chr28_36339953_36341322 260 C T 6.36 chr28 36340213 T 4 0 2 39 0 0 -1 0 2 0 2 33 2 0 2 33 3 0 2 36 4 0 2 39 N 4 0.183 0 -Contig175_chr28_36441165_36441915 68 T C 3.83 chr28 36441234 T 4 4 1 15 6 0 2 45 12 0 2 63 15 0 2 72 6 0 2 45 9 0 2 54 N 4 1.610 2 -Contig29_chr29_4726399_4727143 559 A T 163.0 chr29 4726955 A 15 0 2 72 18 0 2 81 18 0 2 81 16 0 2 75 11 0 2 60 14 0 2 72 Y 161 3.114 0 -Contig48_chr29_13129286_13130137 232 A G 92.2 chr29 13129514 G 13 0 2 66 11 0 2 60 19 0 2 84 16 0 2 75 11 0 2 60 17 0 2 78 Y 337 2.581 1 -Contig64_chr29_15736891_15737257 344 T C 40.4 chr29 15737233 C 1 0 2 30 0 0 -1 0 0 0 -1 0 2 0 2 33 0 0 -1 0 0 0 -1 0 N 58 +99. 0 -Contig33_chr29_17000374_17000921 71 C T 48.6 chr29 17000441 - 4 0 2 39 9 0 2 54 12 0 2 66 10 0 2 57 7 0 2 48 4 0 2 39 N 26 5.491 0 -Contig34_chr29_17581796_17584016 2105 C T 126.0 chr29 17583890 T 14 0 2 69 11 0 2 60 18 0 2 81 12 0 2 63 10 0 2 57 10 0 2 57 Y 22 2.208 0 -Contig19_chr29_20976080_20977761 1007 G A 115.0 chr29 20977076 G 19 0 2 84 22 0 2 93 22 0 2 93 22 0 2 93 11 0 2 60 13 0 2 66 Y 4 1.915 0 -Contig51_chr29_21149853_21150467 266 C T 146.0 chr29 21150118 C 12 0 2 63 12 0 2 63 23 0 2 96 14 0 2 69 13 0 2 66 10 0 2 57 Y 4 0.051 0 -Contig1_chr30_5992217_5993068 106 C T 129.0 chr30 5992319 C 10 0 2 57 11 0 2 60 7 0 2 48 11 0 2 60 10 0 2 57 12 0 2 63 Y 76 1.079 0 -Contig1_chr30_8232878_8233406 402 C T 127.0 chr30 8233264 C 8 0 2 51 19 0 2 84 16 0 2 75 18 0 2 81 10 0 2 57 14 0 2 69 Y 358 5.283 0 -Contig108_chr30_9436961_9437520 546 C T 39.8 chr30 9437502 C 7 0 2 48 5 0 2 42 2 0 2 33 7 0 2 48 5 0 2 42 7 0 2 48 Y 64 +99. 0 -Contig165_chr30_25804389_25804926 190 T C 126.0 chr30 25804592 C 3 0 2 36 8 0 2 51 7 0 2 48 10 0 2 57 7 0 2 48 4 0 2 39 Y 113 0.329 0 -Contig193_chr30_27495616_27496125 434 C A 234.0 chr30 27496024 C 13 0 2 66 16 0 2 75 25 0 2 102 16 0 2 75 13 0 2 66 14 0 2 69 Y 76 2.621 0 -Contig114_chr30_33636712_33637208 34 C T 142.0 chr30 33636744 C 7 0 2 48 4 1 2 20 6 0 2 45 6 0 2 45 3 4 1 29 5 0 2 42 Y 14 8.028 0 -Contig38_chr31_5164423_5166573 2074 C T 134.0 chr31 5166501 T 13 0 2 66 10 0 2 57 17 0 2 78 11 0 2 60 17 0 2 78 10 0 2 57 Y 58 +99. 0 -Contig6_chr31_9649308_9650149 431 G T 162.0 chr31 9649742 G 31 0 2 120 23 0 2 96 17 0 2 78 17 0 2 78 10 0 2 57 16 0 2 75 Y 98 2.200 0 -Contig85_chr31_12242872_12245082 38 G C 92.4 chr31 12242910 G 1 0 2 30 6 0 2 45 9 0 2 54 8 0 2 51 5 0 2 42 9 0 2 54 N 2 2.340 0 -Contig7_chr31_12384974_12386400 305 C T 69.6 chr31 12385267 C 6 0 2 45 10 0 2 57 11 0 2 60 11 0 2 60 9 0 2 54 12 0 2 63 Y 44 1.165 0 -Contig90_chr31_17267583_17267778 81 C A 143.0 chr31 17267665 C 20 0 2 87 6 0 2 45 14 0 2 72 22 0 2 93 17 0 2 78 15 0 2 72 N 7 0.565 0 -Contig68_chr31_20000241_20000597 215 C T 131.0 chr31 20000454 T 0 0 -1 0 0 0 -1 0 0 0 -1 0 0 0 -1 0 0 0 -1 0 0 0 -1 0 Y 5 3.383 1 -Contig137_chr31_23357653_23358568 885 G A 119.0 chr31 23358545 G 5 0 2 42 3 0 2 36 3 0 2 36 2 0 2 33 3 0 2 36 4 0 2 39 Y 11 +99. 0 -Contig17_chr31_26433828_26434459 498 T C 9.79 chr31 26434322 T 18 0 2 81 10 0 2 57 15 0 2 72 13 0 2 66 16 0 2 75 15 0 2 72 Y 137 4.814 0 -Contig9_chr32_19479532_19479735 12 A G 20.7 chr32 19479544 A 1 0 2 30 2 0 2 33 1 0 2 30 5 0 2 42 3 0 2 36 3 0 2 36 N 17 +99. 0 -Contig30_chr32_25902721_25905783 208 C G 162.0 chr32 25902927 G 11 0 2 60 13 0 2 66 11 0 2 60 12 0 2 63 7 0 2 48 11 0 2 60 Y 145 0.322 2 -Contig7_chr32_27789513_27789926 20 G A 7.19 chr32 27789530 A 0 0 -1 0 4 0 2 39 4 0 2 39 4 0 2 39 2 0 2 33 6 0 2 45 Y 14 +99. 0 -Contig42_chr32_38900713_38901320 320 A G 134.0 chr32 38901021 T 12 0 2 63 10 0 2 57 9 11 1 104 5 0 2 42 19 0 2 84 7 6 1 56 Y 71 0.165 0 -Contig18_chr33_22207246_22209159 1363 G T 51.5 chr33 22208619 - 16 0 2 75 8 0 2 51 11 0 2 60 10 0 2 57 15 0 2 72 12 0 2 63 Y 59 2.560 0 -Contig104_chr33_22483642_22484187 424 C T 140.0 chr33 22484054 T 13 0 2 66 16 0 2 75 9 0 2 54 15 0 2 72 13 0 2 66 10 0 2 57 Y 36 0.404 0 -Contig170_chr33_26189421_26189940 292 T C 98.4 chr33 26189703 T 21 0 2 90 13 0 2 66 15 0 2 72 13 0 2 66 19 0 2 84 13 0 2 66 Y 23 0.307 0 -Contig113_chr34_13341080_13341643 236 C T 90.7 chr34 13341316 C 4 0 2 39 2 0 2 33 8 0 2 51 4 0 2 39 8 0 2 51 3 0 2 36 Y 47 0.412 3 -Contig405_chr34_14415672_14415979 59 A G 36.2 chr34 14415731 G 8 0 2 51 2 0 2 33 8 0 2 51 6 0 2 48 3 0 2 36 7 0 2 48 Y 45 0.405 1 -Contig21_chr34_16422980_16425681 2009 G A 19.4 chr34 16424960 G 0 0 -1 0 0 0 -1 0 0 0 -1 0 5 0 2 42 0 0 -1 0 0 0 -1 0 Y 28 0.196 0 -Contig41_chr34_16544482_16545449 46 T C 102.0 chr34 16544523 T 5 0 2 42 11 0 2 60 6 0 2 45 0 2 0 3 7 0 2 48 8 0 2 51 Y 215 1.156 0 -Contig8_chr34_18474513_18475673 1122 C A 129.0 chr34 18475628 A 8 0 2 51 15 0 2 72 13 0 2 66 17 0 2 78 13 0 2 66 6 0 2 45 Y 61 0.123 2 -Contig152_chr34_31794848_31795540 242 G A 93.2 chr34 31795093 G 11 0 2 60 24 0 2 99 17 0 2 78 15 0 2 72 18 0 2 81 17 0 2 78 Y 123 2.780 0 -Contig28_chr34_41708848_41712034 1381 A G 78.2 chr34 41710232 A 11 0 2 60 17 0 2 78 15 0 2 72 16 0 2 75 15 0 2 72 14 0 2 69 Y 236 0.234 0 -Contig85_chr34_42798284_42800584 1845 C T 171.0 chr34 42800126 T 5 0 2 42 7 0 2 48 6 0 2 45 7 0 2 48 6 0 2 45 2 0 2 33 Y 5 2.787 0 -Contig47_chr35_3666773_3667898 348 G T 124.0 chr35 3667121 G 9 0 2 54 20 0 2 87 18 0 2 81 15 0 2 72 12 0 2 63 14 0 2 69 Y 285 0.235 0 -Contig195_chr35_15722500_15722741 205 G A 4.08 chr35 15722718 G 3 0 2 36 5 0 2 42 1 0 2 30 6 0 2 45 1 0 2 30 1 0 2 30 N 43 +99. 0 -Contig101_chr35_19513178_19513697 62 C T 112.0 chr35 19513238 C 12 0 2 63 7 0 2 48 13 0 2 66 7 0 2 48 5 0 2 42 8 0 2 51 N 115 3.135 0 -Contig19_chr35_23887144_23888282 90 C A 10.1 chr35 23887242 - 3 3 1 12 4 4 1 19 8 6 1 37 4 3 1 11 8 3 2 7 9 3 2 11 Y 105 0.199 0 -Contig47_chr35_24382042_24382526 33 G A 87.0 chr35 24382076 G 5 0 2 42 4 0 2 39 6 0 2 45 7 0 2 48 4 0 2 39 2 0 2 33 Y 71 +99. 0 -Contig77_chr35_24796947_24797172 65 A G 52.1 chr35 24797009 A 7 0 2 48 5 0 2 42 8 0 2 51 6 0 2 45 12 0 2 63 10 0 2 57 N 11 1.401 3 -Contig74_chr35_25394343_25394813 303 A T 221.0 chr35 25394646 G 23 0 2 96 15 0 2 72 25 0 2 105 7 7 1 49 18 0 2 81 16 0 2 75 Y 58 4.298 0 -Contig5_chr36_4562983_4563634 343 C T 151.0 chr36 4563324 T 20 0 2 87 20 0 2 87 23 0 2 96 24 0 2 99 9 0 2 54 8 0 2 51 Y 40 1.169 0 -Contig75_chr36_7885319_7885588 53 G A 25.7 chr36 7885372 G 10 0 2 57 8 0 2 51 13 0 2 66 7 0 2 48 4 0 2 39 7 0 2 48 N 7 2.653 0 -Contig184_chr36_18956191_18958552 187 A G 11.5 chr36 18956371 G 10 0 2 57 11 0 2 60 21 0 2 90 14 0 2 69 7 0 2 48 4 0 2 39 N 278 1.434 2 -Contig12_chr36_21557176_21557828 513 T A 159.0 chr36 21557695 A 11 0 2 60 14 0 2 69 21 0 2 90 12 0 2 63 15 0 2 72 11 0 2 60 Y 55 0.222 0 -Contig2_chr36_22436067_22436794 653 C T 73.0 chr36 22436730 C 11 0 2 60 16 0 2 75 13 0 2 66 11 0 2 60 21 0 2 90 21 0 2 90 Y 9 0.534 0 -Contig133_chr36_32954045_32955409 136 A G 116.0 chr36 32954182 A 16 0 2 75 15 0 2 72 20 0 2 87 11 0 2 60 18 0 2 81 13 0 2 66 Y 74 3.772 1 -Contig53_chr37_6665763_6665919 116 C T 111.0 chr37 6665875 C 9 0 2 54 9 0 2 54 5 0 2 42 9 0 2 54 8 0 2 51 10 0 2 57 N 15 10.875 1 -Contig42_chr37_9589176_9591269 252 G A 25.1 chr37 9589430 G 10 0 2 40 13 0 2 66 18 0 2 81 21 0 2 90 9 0 2 54 17 0 2 78 N 67 1.170 2 -Contig2_chr37_17134963_17136513 1140 A C 158.0 chr37 17136092 A 14 0 2 69 24 0 2 99 17 0 2 78 16 0 2 75 15 0 2 75 13 0 2 66 Y 12 0.053 1 -Contig18_chr37_17147806_17149851 291 T G 112.0 chr37 17148084 T 4 6 1 45 16 0 2 75 17 0 2 78 14 0 2 69 22 0 2 93 13 0 2 66 Y 41 4.442 0 -Contig64_chr37_17606895_17607534 565 C T 30.2 chr37 17607439 A 9 0 2 54 16 0 2 75 20 0 2 87 14 0 2 69 16 0 2 75 10 0 2 57 N 20 1.622 0 -Contig126_chr37_21587881_21590621 373 G T 132.0 chr37 21588256 G 11 0 2 60 11 0 2 60 23 0 2 96 12 0 2 63 8 0 2 51 18 0 2 81 Y 12 0.549 0 -Contig2_chr37_31197993_31198256 182 C T 39.6 chr37 31198171 T 6 0 2 45 10 0 2 57 7 0 2 48 9 0 2 54 10 0 2 57 12 0 2 63 N 2 0.595 0 -Contig46_chr37_31852376_31853555 825 A G 111.0 chr37 31853191 G 19 0 2 84 14 0 2 69 15 0 2 72 7 0 2 48 8 0 2 51 16 0 2 75 Y 17 0.128 1 -Contig7_chr38_12217200_12218387 1163 A T 44.4 chr38 12218353 A 11 0 2 60 13 0 2 66 17 0 2 78 10 0 2 57 11 0 2 60 11 0 2 60 Y 67 +99. 0 -Contig15_chr38_12282020_12282253 150 C T 156.0 chr38 12282164 A 17 0 2 78 11 0 2 60 19 0 2 84 14 0 2 69 5 0 2 42 14 0 2 69 Y 26 2.952 1 -Contig4_chr38_14807432_14807747 275 A G 36.5 chr38 14807715 G 1 0 2 30 2 0 2 33 2 0 2 33 4 0 2 39 1 0 2 30 0 0 -1 0 Y 28 +99. 1 -Contig6_chr38_16185744_16186110 325 A G 74.9 chr38 16186061 A 5 0 2 42 3 0 2 36 9 0 2 54 7 0 2 48 1 0 2 30 12 0 2 63 Y 40 +99. 0 -Contig265_chrX_2689247_2689484 114 C G 103.0 chrX 2689356 C 11 0 2 60 9 0 2 54 13 0 2 66 16 0 2 75 14 0 2 69 10 0 2 57 N 2 9.232 1 -Contig122_chrX_6026976_6027327 330 C T 79.4 chrX 6027303 C 3 0 2 36 3 0 2 36 3 0 2 36 4 0 2 39 3 0 2 36 6 0 2 45 Y 30 +99. 0 -Contig15_chrX_15659909_15660340 15 A C 14.9 chrX 15659924 C 1 0 2 30 1 0 2 30 3 0 2 36 6 0 2 45 2 0 2 33 0 0 -1 0 Y 216 +99. 1 -Contig12_chrX_23243561_23244412 479 C G 67.7 chrX 23244037 C 2 0 2 33 4 2 2 8 2 6 1 43 7 0 2 48 6 0 2 45 4 0 2 39 Y 208 1.620 0 -Contig113_chrX_26287829_26288398 385 C T 59.6 chrX 26288213 C 9 0 2 54 9 0 2 54 17 0 2 78 11 0 2 60 3 8 1 44 4 0 2 39 N 13 0.077 0 -Contig186_chrX_29118735_29118939 192 G A 7.01 chrX 29118931 G 1 0 2 30 7 0 2 48 4 0 2 39 5 0 2 42 8 0 2 51 4 0 2 39 N 50 +99. 0 -Contig237_chrX_31256648_31257654 165 T A 246.0 chrX 31256814 T 7 0 2 48 23 0 2 96 19 0 2 84 17 0 2 78 14 0 2 69 8 0 2 51 Y 37 1.481 0 -Contig25_chrX_40729418_40730089 332 C T 31.2 chrX 40729745 C 0 0 -1 0 2 0 2 33 4 0 2 39 5 0 2 42 3 0 2 36 3 0 2 36 Y 34 0.212 0 -Contig90_chrX_57430715_57431566 548 C T 116.0 chrX 57431266 T 9 0 2 54 18 0 2 81 13 0 2 66 14 0 2 69 8 0 2 54 7 0 2 48 Y 261 0.154 1 -Contig133_chrX_84833782_84834125 182 G A 69.7 chrX 84833962 G 5 0 2 42 18 0 2 81 12 0 2 63 19 0 2 84 6 3 1 27 7 0 2 48 N 619 0.278 0 -Contig129_chrX_90586053_90586467 135 A T 120.0 chrX 90586195 A 1 0 2 30 6 0 2 45 8 0 2 51 5 0 2 42 1 0 2 30 2 0 2 33 N 637 0.245 0 -Contig125_chrX_93319363_93320877 349 A C 145.0 chrX 93319721 A 4 0 2 39 6 0 2 45 11 0 2 60 10 0 2 57 13 0 2 66 6 0 2 45 Y 59 1.686 0
--- a/test-data/test_out/add_fst_column/add_fst_column.gd_snp Fri Sep 28 11:34:31 2012 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,21 +0,0 @@ -Contig113_chr5_11052263_11052603 28 C T 38.2 chr5 11052280 C 1 2 1 12 3 2 1 10 5 0 2 42 2 1 2 13 3 0 2 36 8 0 2 51 Y 161 +99. 0 0.1636 -Contig215_chr5_70946445_70947428 363 T G 28.2 chr5 70946809 C 4 0 2 39 0 5 0 12 9 0 2 54 6 0 2 45 3 3 2 1 9 0 2 54 N 43 0.153 0 0.3846 -Contig132_chr7_20426224_20428145 1815 A G 28.3 chr7 20428041 A 11 1 2 43 12 0 2 63 19 0 2 84 23 0 2 96 14 0 2 69 10 0 2 57 N 11 0.264 0 0.0213 -Contig30_chr8_17147743_17147923 13 G A 105.0 chr8 17147756 A 1 3 1 19 1 0 2 30 3 0 2 36 1 0 2 30 1 0 2 30 3 0 2 36 N 6 +99. 0 0.4286 -Contig44_chr8_71186368_71188207 1455 G T 147.0 chr8 71187818 G 4 10 1 74 3 0 2 36 20 0 2 87 12 0 2 63 8 0 2 51 10 0 2 57 Y 88 0.036 0 0.4167 -Contig103_chr11_8844784_8845095 214 T G 135.0 chr11 8844993 T 1 1 2 12 10 0 2 57 5 4 1 26 2 3 1 13 2 7 1 34 1 1 2 13 Y 75 0.731 0 0.2101 -Contig37_chr13_15910164_15910426 245 G A 32.9 chr13 -1 N 3 4 1 41 4 0 2 39 3 0 2 36 4 0 2 39 3 0 2 36 10 0 2 57 N -1 2.159 1 0.2222 -Contig50_chr17_12247973_12249183 889 G T 47.6 chr17 12248878 G 0 1 2 9 8 0 2 51 9 2 2 21 7 2 2 21 15 0 2 72 0 3 0 9 Y 1 1.181 0 0.0150 -Contig159_chr22_7896450_7896974 109 G C 151.0 chr22 7896570 G 16 0 2 75 5 7 1 62 14 0 2 69 16 0 2 75 13 0 2 66 13 0 2 66 Y 16 0.465 0 0.1429 -Contig77_chr22_49764414_49764875 353 C A 148.0 chr22 49764777 C 7 4 1 65 18 0 2 81 16 0 2 75 20 0 2 87 4 3 1 52 9 4 1 67 Y 12 0.941 0 0.0741 -Contig61_chr24_30465488_30465834 149 G T 68.2 chr24 30465637 G 13 0 2 66 4 2 2 11 18 0 2 81 11 0 2 60 11 0 2 60 9 0 2 54 N 99 0.105 2 0.0556 -Contig59_chr25_18196776_18197707 785 G A 112.0 chr25 18197551 G 8 10 1 42 27 0 2 108 21 0 2 90 18 0 2 81 10 0 2 57 14 0 2 69 N 36 3.625 0 0.1250 -Contig85_chr27_45471750_45472022 211 G A 53.1 chr27 45471964 G 18 0 2 81 10 0 2 57 15 0 2 72 0 13 0 36 16 0 2 75 14 0 2 69 N 75 2.502 1 0.3023 -Contig175_chr28_36441165_36441915 68 T C 3.83 chr28 36441234 T 4 4 1 15 6 0 2 45 12 0 2 63 15 0 2 72 6 0 2 45 9 0 2 54 N 4 1.610 2 0.1667 -Contig114_chr30_33636712_33637208 34 C T 142.0 chr30 33636744 C 7 0 2 48 4 1 2 20 6 0 2 45 6 0 2 45 3 4 1 29 5 0 2 42 Y 14 8.028 0 0.0435 -Contig42_chr32_38900713_38901320 320 A G 134.0 chr32 38901021 T 12 0 2 63 10 0 2 57 9 11 1 104 5 0 2 42 19 0 2 84 7 6 1 56 Y 71 0.165 0 0.2821 -Contig41_chr34_16544482_16545449 46 T C 102.0 chr34 16544523 T 5 0 2 42 11 0 2 60 6 0 2 45 0 2 0 3 7 0 2 48 8 0 2 51 Y 215 1.156 0 0.1429 -Contig19_chr35_23887144_23888282 90 C A 10.1 chr35 23887242 - 3 3 1 12 4 4 1 19 8 6 1 37 4 3 1 11 8 3 2 7 9 3 2 11 Y 105 0.199 0 0.0051 -Contig74_chr35_25394343_25394813 303 A T 221.0 chr35 25394646 G 23 0 2 96 15 0 2 72 25 0 2 105 7 7 1 49 18 0 2 81 16 0 2 75 Y 58 4.298 0 0.0986 -Contig18_chr37_17147806_17149851 291 T G 112.0 chr37 17148084 T 4 6 1 45 16 0 2 75 17 0 2 78 14 0 2 69 22 0 2 93 13 0 2 66 Y 41 4.442 0 0.1304 -Contig12_chrX_23243561_23244412 479 C G 67.7 chrX 23244037 C 2 0 2 33 4 2 2 8 2 6 1 43 7 0 2 48 6 0 2 45 4 0 2 39 Y 208 1.620 0 0.0256
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/test_out/average_fst/average_fst.txt Fri Sep 28 11:35:56 2012 -0400 @@ -0,0 +1,1 @@ +average Fst is 0.16461, using 21 SNPs
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/test_out/coverage_distributions/coverage.html Fri Sep 28 11:35:56 2012 -0400 @@ -0,0 +1,39 @@ +<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN" +"http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd"> +<html xmlns="http://www.w3.org/1999/xhtml"> + <head> + <meta http-equiv="Content-type" content="text/html; charset=UTF-8" /> + <link rel="stylesheet" href="/static/style/base.css" type="text/css" /> + <title>Coverage distributions Galaxy Composite Dataset</title> + </head> + <body> + <div class="document"> + Output completed: 2012-04-03 01:57:24 PM + <p/> + <div id="gd_outputs"> + Outputs + <ul> + <li><a href="coverage.pdf">coverage.pdf</a></li> + <li><a href="coverage.txt">coverage.txt</a></li> + </ul> + </div> + <div id="gd_inputs"> + Inputs + <ul> + <li>Data source: sequence coverage</li> + </ul> + </div> + <div id="gd_misc"> + Individuals +<ol> +<li>PB1</li> +<li>PB2</li> +<li>PB3</li> +<li>PB4</li> +<li>PB6</li> +<li>PB8</li> +</ol> + </div> + </div> + </body> +</html>
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/test_out/coverage_distributions/coverage.pdf Fri Sep 28 11:35:56 2012 -0400 @@ -0,0 +1,363 @@ +%PDF-1.4 +%âãÏÓ\r +1 0 obj +<< +/CreationDate (D:20120403135724) +/ModDate (D:20120403135724) +/Title (R Graphics Output) +/Producer (R 2.11.0) +/Creator (R) +>> +endobj +2 0 obj +<< +/Type /Catalog +/Pages 3 0 R +>> +endobj +5 0 obj +<< +/Type /Page +/Parent 3 0 R +/Contents 6 0 R +/Resources 4 0 R +>> +endobj +6 0 obj +<< +/Length 7 0 R +>> +stream +1 J 1 j q +Q q 59.04 73.44 630.72 299.52 re W n +1.000 0.000 0.000 RG +2.25 w +[] 0 d +1 J +1 j +10.00 M +82.40 174.26 m +106.73 206.89 l +131.07 206.89 l +155.40 263.98 l +179.73 263.98 l +204.07 223.20 l +228.40 312.93 l +252.73 304.77 l +277.07 255.83 l +301.40 280.30 l +325.73 312.93 l +350.07 321.08 l +374.40 255.83 l +398.73 263.98 l +423.07 231.36 l +447.40 231.36 l +471.73 174.26 l +496.07 215.04 l +520.40 174.26 l +544.73 133.47 l +569.07 157.95 l +593.40 109.00 l +617.73 109.00 l +642.07 92.69 l +666.40 84.53 l +S +Q q +0.000 0.000 0.000 RG +0.75 w +[] 0 d +1 J +1 j +10.00 M +82.40 73.44 m 569.07 73.44 l S +82.40 73.44 m 82.40 66.24 l S +204.07 73.44 m 204.07 66.24 l S +325.73 73.44 m 325.73 66.24 l S +447.40 73.44 m 447.40 66.24 l S +569.07 73.44 m 569.07 66.24 l S +BT +0.000 0.000 0.000 rg +/F2 1 Tf 12.00 0.00 -0.00 12.00 79.06 47.52 Tm (0) Tj +ET +BT +/F2 1 Tf 12.00 0.00 -0.00 12.00 200.73 47.52 Tm (5) Tj +ET +BT +/F2 1 Tf 12.00 0.00 -0.00 12.00 319.06 47.52 Tm (10) Tj +ET +BT +/F2 1 Tf 12.00 0.00 -0.00 12.00 440.73 47.52 Tm (15) Tj +ET +BT +/F2 1 Tf 12.00 0.00 -0.00 12.00 562.39 47.52 Tm (20) Tj +ET +59.04 84.53 m 59.04 345.55 l S +59.04 84.53 m 51.84 84.53 l S +59.04 149.79 m 51.84 149.79 l S +59.04 215.04 m 51.84 215.04 l S +59.04 280.30 m 51.84 280.30 l S +59.04 345.55 m 51.84 345.55 l S +BT +/F2 1 Tf 0.00 12.00 -12.00 0.00 41.76 72.86 Tm (0.00) Tj +ET +BT +/F2 1 Tf 0.00 12.00 -12.00 0.00 41.76 138.11 Tm (0.02) Tj +ET +BT +/F2 1 Tf 0.00 12.00 -12.00 0.00 41.76 203.37 Tm (0.04) Tj +ET +BT +/F2 1 Tf 0.00 12.00 -12.00 0.00 41.76 268.62 Tm (0.06) Tj +ET +BT +/F2 1 Tf 0.00 12.00 -12.00 0.00 41.76 333.88 Tm (0.08) Tj +ET +59.04 73.44 m +689.76 73.44 l +689.76 372.96 l +59.04 372.96 l +59.04 73.44 l +S +Q q +BT +0.000 0.000 0.000 rg +/F2 1 Tf 12.00 0.00 -0.00 12.00 348.69 18.72 Tm [(Co) 15 (v) 25 (er) 10 (age)] TJ +ET +BT +/F2 1 Tf 0.00 12.00 -12.00 0.00 12.96 195.28 Tm [(Propor) -40 (tion)] TJ +ET +Q q 59.04 73.44 630.72 299.52 re W n +1.000 1.000 0.000 RG +2.25 w +[] 0 d +1 J +1 j +10.00 M +82.40 157.95 m +106.73 166.10 l +131.07 231.36 l +155.40 215.04 l +179.73 280.30 l +204.07 263.98 l +228.40 272.14 l +252.73 231.36 l +277.07 345.55 l +301.40 321.08 l +325.73 288.45 l +350.07 329.24 l +374.40 255.83 l +398.73 280.30 l +423.07 247.67 l +447.40 239.51 l +471.73 215.04 l +496.07 157.95 l +520.40 174.26 l +544.73 166.10 l +569.07 133.47 l +593.40 92.69 l +617.73 100.85 l +642.07 100.85 l +666.40 100.85 l +S +0.000 1.000 0.000 RG +82.40 141.63 m +106.73 166.10 l +131.07 182.42 l +155.40 182.42 l +179.73 231.36 l +204.07 198.73 l +228.40 206.89 l +252.73 263.98 l +277.07 263.98 l +301.40 263.98 l +325.73 239.51 l +350.07 280.30 l +374.40 198.73 l +398.73 304.77 l +423.07 231.36 l +447.40 247.67 l +471.73 239.51 l +496.07 239.51 l +520.40 215.04 l +544.73 198.73 l +569.07 231.36 l +593.40 149.79 l +617.73 166.10 l +642.07 166.10 l +666.40 100.85 l +S +0.000 1.000 1.000 RG +82.40 133.47 m +106.73 133.47 l +131.07 255.83 l +155.40 231.36 l +179.73 272.14 l +204.07 272.14 l +228.40 337.40 l +252.73 280.30 l +277.07 280.30 l +301.40 280.30 l +325.73 337.40 l +350.07 288.45 l +374.40 296.61 l +398.73 223.20 l +423.07 272.14 l +447.40 255.83 l +471.73 239.51 l +496.07 190.57 l +520.40 117.16 l +544.73 125.32 l +569.07 149.79 l +593.40 109.00 l +617.73 109.00 l +642.07 92.69 l +666.40 92.69 l +S +0.000 0.000 1.000 RG +82.40 157.95 m +106.73 190.57 l +131.07 215.04 l +155.40 288.45 l +179.73 231.36 l +204.07 272.14 l +228.40 272.14 l +252.73 280.30 l +277.07 296.61 l +301.40 361.87 l +325.73 329.24 l +350.07 329.24 l +374.40 296.61 l +398.73 272.14 l +423.07 215.04 l +447.40 239.51 l +471.73 190.57 l +496.07 157.95 l +520.40 166.10 l +544.73 125.32 l +569.07 100.85 l +593.40 92.69 l +617.73 109.00 l +642.07 84.53 l +666.40 92.69 l +S +1.000 0.000 1.000 RG +82.40 198.73 m +106.73 157.95 l +131.07 215.04 l +155.40 215.04 l +179.73 304.77 l +204.07 223.20 l +228.40 321.08 l +252.73 361.87 l +277.07 280.30 l +301.40 280.30 l +325.73 329.24 l +350.07 280.30 l +374.40 337.40 l +398.73 231.36 l +423.07 272.14 l +447.40 223.20 l +471.73 174.26 l +496.07 198.73 l +520.40 149.79 l +544.73 117.16 l +569.07 100.85 l +593.40 109.00 l +617.73 100.85 l +642.07 84.53 l +666.40 100.85 l +S +1.000 0.000 0.000 rg +0.000 0.000 0.000 RG +0.75 w +[] 0 d +642.24 362.16 8.64 -7.20 re B +1.000 1.000 0.000 rg +642.24 347.76 8.64 -7.20 re B +0.000 1.000 0.000 rg +642.24 333.36 8.64 -7.20 re B +0.000 1.000 1.000 rg +642.24 318.96 8.64 -7.20 re B +0.000 0.000 1.000 rg +642.24 304.56 8.64 -7.20 re B +1.000 0.000 1.000 rg +642.24 290.16 8.64 -7.20 re B +BT +0.000 0.000 0.000 rg +/F2 1 Tf 12.00 0.00 -0.00 12.00 661.68 354.25 Tm (PB1) Tj +ET +BT +/F2 1 Tf 12.00 0.00 -0.00 12.00 661.68 339.85 Tm (PB2) Tj +ET +BT +/F2 1 Tf 12.00 0.00 -0.00 12.00 661.68 325.45 Tm (PB3) Tj +ET +BT +/F2 1 Tf 12.00 0.00 -0.00 12.00 661.68 311.05 Tm (PB4) Tj +ET +BT +/F2 1 Tf 12.00 0.00 -0.00 12.00 661.68 296.65 Tm (PB6) Tj +ET +BT +/F2 1 Tf 12.00 0.00 -0.00 12.00 661.68 282.25 Tm (PB8) Tj +ET +Q +endstream +endobj +7 0 obj +4763 +endobj +3 0 obj +<< +/Type /Pages +/Kids [ +5 0 R +] +/Count 1 +/MediaBox [0 0 720 432] +>> +endobj +4 0 obj +<< +/ProcSet [/PDF /Text] +/Font <</F2 9 0 R >> +/ExtGState << >> +>> +endobj +8 0 obj +<< +/Type /Encoding +/BaseEncoding /WinAnsiEncoding +/Differences [ 45/minus 96/quoteleft +144/dotlessi /grave /acute /circumflex /tilde /macron /breve /dotaccent +/dieresis /.notdef /ring /cedilla /.notdef /hungarumlaut /ogonek /caron /space] +>> +endobj +9 0 obj << +/Type /Font +/Subtype /Type1 +/Name /F2 +/BaseFont /Helvetica +/Encoding 8 0 R +>> endobj +xref +0 10 +0000000000 65535 f +0000000021 00000 n +0000000164 00000 n +0000005129 00000 n +0000005212 00000 n +0000000213 00000 n +0000000293 00000 n +0000005109 00000 n +0000005293 00000 n +0000005550 00000 n +trailer +<< +/Size 10 +/Info 1 0 R +/Root 2 0 R +>> +startxref +5646 +%%EOF
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/test_out/coverage_distributions/coverage.txt Fri Sep 28 11:35:56 2012 -0400 @@ -0,0 +1,18 @@ + + 0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 + PB1 2 6 10 15 21 25 32 39 44 50 57 64 70 75 80 84 87 91 94 95 + PB2 2 4 9 13 19 24 30 35 43 50 56 64 69 75 80 85 89 91 94 96 + PB3 1 4 7 10 14 18 22 27 33 38 43 49 52 59 64 69 73 78 82 86 + PB4 1 3 8 12 18 24 32 38 44 50 57 64 70 74 80 85 90 93 94 96 + PB6 2 5 9 15 20 26 31 37 44 52 60 67 74 80 84 88 92 94 96 98 + PB8 3 5 9 13 20 24 32 40 46 52 60 66 73 78 84 88 91 94 96 97 + + + 20 21 22 23 24 + PB1 97 98 99 99 99 + PB2 98 98 98 99 99 + PB3 90 92 95 97 98 + PB4 98 98 99 99 99 + PB6 98 98 99 99 99 + PB8 98 98 99 99 99 +
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/test_out/dpmix/dpmix.html Fri Sep 28 11:35:56 2012 -0400 @@ -0,0 +1,56 @@ +<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN" +"http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd"> +<html xmlns="http://www.w3.org/1999/xhtml"> + <head> + <meta http-equiv="Content-type" content="text/html; charset=UTF-8" /> + <link rel="stylesheet" href="/static/style/base.css" type="text/css" /> + <title>dpmix Galaxy Composite Dataset</title> + </head> + <body> + <div class="document"> + Output completed: 2012-04-03 02:22:23 PM + <p/> + <div id="gd_outputs"> + Outputs + <ul> + <li><a href="dpmix.pdf">dpmix.pdf</a></li> + <li><a href="misc.txt">misc.txt</a></li> + </ul> + </div> + <div id="gd_inputs"> + Inputs + <ul> + <li>Data source: sequence coverage</li> + <li>Switch penalty: 10</li> + <li>Also analyze random chromosome: no</li> + </ul> + </div> + <div id="gd_misc"> + Populations +<ul> +<li> +Ancestral population 1 +<ol> +<li>PB1</li> +<li>PB2</li> +</ol> +</li> +<li> +Ancestral population 2 +<ol> +<li>PB3</li> +<li>PB4</li> +</ol> +</li> +<li> +Potentially admixed +<ol> +<li>PB6</li> +<li>PB8</li> +</ol> +</li> +</ul> + </div> + </div> + </body> +</html>
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/test_out/dpmix/dpmix.tabular Fri Sep 28 11:35:56 2012 -0400 @@ -0,0 +1,78 @@ +chr1 0 125154818 0 PB6 +chr1 0 125154818 0 PB8 +chr2 0 85243509 0 PB6 +chr2 0 85243509 0 PB8 +chr3 0 92410450 0 PB6 +chr3 0 92410450 0 PB8 +chr4 0 75619257 0 PB6 +chr4 0 75619257 0 PB8 +chr5 0 90203461 0 PB6 +chr5 0 90203461 0 PB8 +chr6 0 74848993 0 PB6 +chr6 0 74848993 0 PB8 +chr7 0 55833450 0 PB6 +chr7 0 55833450 0 PB8 +chr8 0 71187818 0 PB6 +chr8 0 71187818 0 PB8 +chr9 0 39008708 0 PB6 +chr9 0 39008708 0 PB8 +chr10 0 59511126 0 PB6 +chr10 0 59511126 0 PB8 +chr11 0 53408638 0 PB6 +chr11 0 53408638 2 PB8 +chr12 0 71364712 0 PB6 +chr12 0 71364712 0 PB8 +chr13 0 66022136 0 PB6 +chr13 0 66022136 0 PB8 +chr14 0 56768832 0 PB6 +chr14 0 56768832 0 PB8 +chr15 0 45107015 0 PB6 +chr15 0 45107015 0 PB8 +chr16 0 49888550 0 PB6 +chr16 0 49888550 0 PB8 +chr17 0 61714821 2 PB6 +chr17 0 61714821 0 PB8 +chr18 0 58130413 0 PB6 +chr18 0 58130413 0 PB8 +chr19 0 56559549 0 PB6 +chr19 0 56559549 0 PB8 +chr20 0 46551277 0 PB6 +chr20 0 46551277 0 PB8 +chr21 0 43475551 0 PB6 +chr21 0 43475551 0 PB8 +chr22 0 62406302 0 PB6 +chr22 0 62406302 0 PB8 +chr23 0 48285470 0 PB6 +chr23 0 48285470 0 PB8 +chr24 0 46598214 0 PB6 +chr24 0 46598214 0 PB8 +chr25 0 51074589 0 PB6 +chr25 0 51074589 0 PB8 +chr26 0 36606979 0 PB6 +chr26 0 36606979 0 PB8 +chr27 0 45471964 2 PB6 +chr27 0 45471964 2 PB8 +chr28 0 36441234 0 PB6 +chr28 0 36441234 0 PB8 +chr29 0 21150118 0 PB6 +chr29 0 21150118 0 PB8 +chr30 0 33636744 2 PB6 +chr30 0 33636744 0 PB8 +chr31 0 26434322 0 PB6 +chr31 0 26434322 0 PB8 +chr32 0 38901021 2 PB6 +chr32 0 38901021 0 PB8 +chr33 0 26189703 0 PB6 +chr33 0 26189703 0 PB8 +chr34 0 42800126 2 PB6 +chr34 0 42800126 2 PB8 +chr35 0 25394646 2 PB6 +chr35 0 25394646 2 PB8 +chr36 0 32954182 0 PB6 +chr36 0 32954182 0 PB8 +chr37 0 31853191 0 PB6 +chr37 0 31853191 0 PB8 +chr38 0 16186061 0 PB6 +chr38 0 16186061 0 PB8 +chrX 0 93319721 2 PB6 +chrX 0 93319721 2 PB8
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/test_out/dpmix/misc.txt Fri Sep 28 11:35:56 2012 -0400 @@ -0,0 +1,11 @@ +state 2 agrees with: PB1 PB2 +state 0 agrees with: PB3 PB4 + +PB6: 360 SNPs where state 2 is as likely as state 0 +PB6: 12 SNPs where state 0 is more likely than state 2 + +PB8: 358 SNPs where state 2 is as likely as state 0 +PB8: 14 SNPs where state 0 is more likely than state 2 + +PB6: 0 = 83.7%, 1 = 0.0%, 2 = 16.3% +PB8: 0 = 87.6%, 1 = 0.0%, 2 = 12.4%
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/test_out/evaluate_population_numbers/evaluate_population_numbers.txt Fri Sep 28 11:35:56 2012 -0400 @@ -0,0 +1,2 @@ +CV error (K=1): 0.07423 +CV error (K=2): 0.07708
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/test_out/extract_primers/extract_primers.txt Fri Sep 28 11:35:56 2012 -0400 @@ -0,0 +1,1265 @@ +> Contig161_chr1_4641264_4641879 115 C T 0.323016 + + 1 TCCGAACCGCTAAATCCTGACGACTGTTCAGTGAGAACGGGnTTCCAGCTCAGTGGAGAC + >>>>>>>>>>>>>>>>>>>> + + 61 ACTCAGAGCTTATGTGATGCACCGTCGTGCCCGTGTCTGACTAAATGTGTTGCCAGAGAA + <<<< + +121 CAAAACGAAAGCCCCTATT + <<<<<<<<<<<<<<<< + +> Contig86_chr1_30984450_30985684 670 C T 0.031427 + + 1 TAATTCATGACGACTGCAGAAGGGCACTCAGAGGCAATTCTACTTGAGGATATTGTCTGG + >>>>>>>>>>>>>>>>>>>> + + 61 TATACTCTGTCCTTGCTCAGGACATCAGTGAGAACATAGAAACATTCACnTCCCCACACC + + +121 GAAAGCGTCTGTAGACCGGCCCACGGGCCGAAGTCTTTGCATTTCCTCTTGCCATGCACG + + +181 AGCATTCCCAGTGGCAATCAGGGGCCAGCCCTTCTGTTTGGCCTCTGCAAGCTTGTATCC + <<<<<<<<<<<<<<<<<<<< + +241 TTG + + +> Contig21_chr1_60697952_60699446 307 G A 0.507396 + + 1 TCTGGGGCCATGTTTCTGAAGTAAGGCTGTTTCTGCAGCCTTGCGGGCTGTGTCTTGCTC + >>>>>>>>>>>>>>>>>>>> + + 61 nCACCCCTTAATTCTTACCTGTAGGTGGTATTTGGTAGAGTGGAGTAAAACTGGAAACTG + << + +121 GTTCTCTGTGTTCCTGCATCT + <<<<<<<<<<<<<<<<<< + +> Contig64_chr1_87343284_87345672 163 T A 0.038702 + VspI + 1 ATGGCCAATTCTGGTTTAcGCATCATTGTTAACAACTCTTCCATTCATTCTCAGAATTTT + >>>>>>>>>>>>>>>>>>>> + + 61 CCCAATTCACATGATAAATTGTATGGTCACCTACcTACAACTAAACACTTAGTTTATTTC + + +121 TATTATTATTATTATTATTATTATTATTATTAnTAtTATTATTGAAATACATTTTTTTTT + + +181 CATAAACCGTTCACcCTTGTGAGAAC + <<<<<<<<<<<<<<<<<<<< + +> Contig20_chr1_110679280_110679687 181 C T 0.659726 + + 1 GAGCACTCAATGAGGGGTTCGACCCTTTGCAGACACAGCATGTAGGAGGAAGAAATGCAA + >>>>>>>>>>>>>>>>>>>> + + 61 cGGGGCACCCCTGCGGGGGCAGGCTTCCAGTTCAAACTGATCnGGTCTGGTCCTGGGGCC + + +121 GGGCCAAAGTTGTGGTTTCcCGCACTCAAGTCTCCAC + <<<<<<<<<<<<<<<<<<<< + +> Contig222_chr2_9817738_9818143 220 C T 0.092668 + SpeI + 1 AGATTTAGCTGGAGCATGCCTTTGCCCTTTTTAGCCTTTCCCTTTTACCTTTATCCTTCT + >>>>>>>>>>>>>>>>>>>> + + 61 TATTCTTGAAATGTTGAAATAGATGGAAGTATAGCAGCTATCTTGTCCCATAATGATGAA + + +121 AACCAGGTACAAAGTTGGTGAAAACTAAAAGAGAGGAGGAGCCTGGGTTCTTGGTGGCAT + + +181 CATGAACACCTGCACnAGTCTAGCATGGTCTGTGCAAAATCTCCTGATCCAAGAAAAATA + + +241 TAAACATCCTTCTGTAGGGTTTTATTgCCTGAAGCAAAA + <<<<<<<<<<<<<<<<<<<< + +> Contig47_chr2_25470778_25471576 126 G A 0.289103 + Bsp1286I + 1 GCCAGGCGTCCCTCTTTTTGAGTTCtAATTGTGTACATCCAATCCCCATCTCAACAAATA + >>>>>>>>>>>>>>>>>>>> + + 61 GCTGAACCAGCTTCCTaTTTATTTGGTAGGTnAGCACTCTAGAAATTTGCTACACTGAAC + + +121 TCACCAAATTTATAATGTaAATTATGACCATTCTTTGCCATAATAATTTGGGGTAGGTCA + + +181 GATTTGGTTTTGGGGGCAGAAGAAATCATCATATCACAAGCATGTGACAGCTTCCAGCCC + <<< + +241 CATCTCAACTCCAAGAAATT + <<<<<<<<<<<<<<<<< + +> Contig6_chr2_56859179_56859956 671 T C 5.308026 + MspA1I + 1 TATCCCAAAGACGTGTGTCTCAAAGCCCTGAGGTTTACAGCCAAACATGATGGACTGCCC + >>>>>>>>>>>>>>>>>>>> + + 61 ATGACAAcGGATACAAATGCTAGCgTGGGTTTAATTATGCTAGAATTTTTATGATAATTA + + +121 TAATGATATTGTTATGAAGTATGCTAGGCTTTnAGCGGCTAGTCTCTAAACCTATTTTCC + + +181 tTATAAATCCTTTTATTTTTAGTGCACTATTTTATAGAATAAGAGGTTTTTCAGGAACAC + <<<<<<<<<<<<<<<< + +241 ATATATTGCATT + <<<<<<<<< + +> Contig163_chr2_76402959_76404830 221 C T 0.178077 + + 1 GCCCCTTCGAGTCCATCTTaCGCgCAGCAGCAGGAGGGATGGTCCCAACCACAAACCTAC + >>>>>>>>>>>>>>>>>>>> + + 61 CCGCTGCCTGAACGCTTnAAGTGCCCTCCGAAGAAAGCCCAACTCCACAGCCTGGCAACT + + +121 GAGGTCCTTGTGATCTTAGCTTCCTCTGCCCCACTCCACAGCTCAGCCTCACCgGACTCC + + +181 CGAGCTCCTTAAAGGAGCCCCCGAGCCCCCGCACATGCTGTTCCCTGTAACCGGGTACTC + + +241 CACGGCTCGTCTGTCCTTGGAGGCTCAGCTG + <<<<<<<<<<<<<<<<<<<< + +> Contig56_chr3_17326225_17327548 387 G C 0.224947 + AgeI,HpaII,MspI + 1 CAAAGGCAGTGATATGGGAGTGGAATGGAGAGGATGGGTGCCCCAGACTGGGTGCAGATC + >>>>>>>>>>>>>>>>>>>> + + 61 TGTTCTATCTGGTGTTTGGTGGCTGACCATACnGGTGAGAAGAAGTGTcCAGGTTTCTGG + + +121 CTTGATGATGCCGACAGTTATGGCAGGAAATGCTGAAGGGGTGCACATGAGCTCCTGTTC + + +181 ATTCTTCACTCTTCCTCTTCTACCTCCAACCTTGCTACCTGTGTGTACCCGACTC + <<<<<<<<<<<<<<<<<<<< + +> Contig108_chr3_46210055_46210874 367 A G 0.027845 + + 1 TTCACTCACCTGCTTCCCTGCTAACTGTCACCGCCCTCCCAATGCCTTAAACCAGCTTAG + >>>>>>>>>>>>>>>>>>>> + + 61 AAACACAAAATTTAAAAAACATTATGTTGAGACAAAAATATGTATAACCTGGAATATTGA + + +121 ATAACAAAATGAAAGGGAAAATGATTCAAGAACACTTGGATAAGGAAAACTACAAATATT + + +181 nAAGATGTACCTTTGAACTTCCTATCACTGAAAGCAACCATGGAACCAGTACAATGTAGA + + +241 CCTTCTGATCTGACTTTCTTTTGTCTCTTGCTGCTGGGAAGTAGAATGCCCC + <<<<<<<<<<<<<<<<<<<< + +> Contig1_chr3_51588422_51589409 926 A G 1.147200 + + 1 AGATTATGGCCTGTGTTTACcCCAGCCTcGCAGAACATTTTACTGGGGACACCTGCCAGG + >>>>>>>>>>>>>>>>>>>> + + 61 TGGCAGATCAGAAGCCCGTGAGGCAGCCAGCCAATGGGAtGGCCAAAACCTAGGGCTTCG + + +121 TAnGGGAGGGAGATGTTTTCCTCgTCCCTCT + <<<<<<<<<<<<<<<<<<<< + +> Contig65_chr3_80727952_80728283 39 T C 7.077725 + + 1 CAAAGGCTTACTTTTTaGATCAACACTCTAAATTCTTAAnAAACAACAAAGCCAAATTTT + >>>>>>>>>>>>>>>>>>>>>>>>>> + + 61 CCTATATCATTGAGTAGTTGATACGTCTTTGGTTTTGCGCTAGCAGT + <<<<<<<<<<<<<<<<<<<< + +> Contig134_chr4_12145648_12148225 1326 C T 0.079565 + BalI + 1 AACCCAGAtCAGAAACGTCCCATGGCTAGTCATCTTCCTACACAGACTTCTgAGAGCCAA + >>>>>>>>>>>>>>>>>>>> + + 61 GCATCGTCAAcCGGCCAtTCTnGGCCATTCTCCCGAGCAGATGCTGCCGGGATAATCTGC + + +121 AGCATGAAGCCCTCCCTCGGGGGAGACCCGACcgGGTCCACACAGGTCTGTcTAGC + <<<<<<<<<<<<<<<<<<<< + +> Contig19_chr4_26233601_26233991 146 G C 0.163005 + DpnI,MboI,Sau3AI + 1 AATTTGGCTTCCTCTGGAGTtGTCCCTTAATGCTAGGTATCAAGTGCTGACAGGCCACAG + >>>>>>>>>>>>>>>>>>>> + + 61 ATnAGGGTAACACATGATTACAGGGCAACACACTGTAACACGTATTCCCTTGCCTTGTCT + <<<<<<<<<<<<<<<<<<<< + +121 T + + +> Contig17_chr4_61310346_61311158 267 C T 0.097708 + + 1 TATTCCAGACCAACCAAAAGGTCTAAGGAATAATAGAAGCTTCACCCACAGACCTGCCAC + >>>>>>>>>>>>>>>>>>>> + + 61 CCAACTTGAGAAACAGCACTTGCTTCCTCATAGAGTCGAAACGTCTTCGGTGGGTCCCCT + + +121 CCTGAAGCATCACCGCTACCTTTCCTCTTGGGAGTCACTGCCACCCnGAACTTGTTGCTG + + +181 CTTATTCTCTTTTATTTTTCTTGTTTTTGAAAGAACCCTGTCTTGGGTGTTAGGATAC + <<<<<<<<<<<<<<<<<<<< + +> Contig31_chr5_4734956_4736547 1166 C T 0.020932 + + 1 TGTTCTGCCATGCACACTTCTTCAACCCTTCAACCTGTGGGAGTCACCTCACATTCCCAC + >>>>>>>>>>>>>>>>>>>> + + 61 AGcGAATGGAATATCTATCTATCTgnCTTTAGGGATTTGTTACGTTTTCTTTTTCTTCCT + + +121 TTTCCTTCCAATATCTTAATGGGCAATTTTGTGGACAGTTGATAGAGACAACGTCAGGAG + <<<<<<< + +181 CTGTTGGCCTAGTAAA + <<<<<<<<<<<<< + +> Contig6_chr5_26899813_26900498 97 A C 7.369943 + AvaII,Sau96I,SinI + 1 AACTGAAAGTGAGAATTCTTTGTATTTGCTAGTCAAAAGGATTTCTAAGTCAAAAAAGTA + >>>>>>>>>>>>>>>>>>>>>>>>>> + + 61 ATTTGGGAnCATTAAGTCATATTTATAGACTAAAATTTCATTCCTAAAGACAATTTAGTA + + +121 AAAATGCTAGGCTTTCTAGAAATTTAACCTAACATAAAAAATTACAGTAAGTTTGCTAAA + + +181 GAATCACAGAGTTGACTGACAGTTTCCCAGGTTA + <<<<<<<<<<<<<<<<<<<< + +> Contig45_chr5_50892738_50892968 169 C A 0.496871 + + 1 TGAAAGGGGCACTGGGAATTATCAGAACCTTCTGGGTAATTAAACTGGGGAAAGCATAAT + >>>>>>>>>>>>>>>>>>>> + + 61 ACCATTTAGAAAAAGTTCAAGTGAGTCTTTTCCTTATTCTCCCnTGTACCCAGAAAAACC + <<<<<<<<<<< + +121 TGGACATGGTAC + <<<<<<<<< + +> Contig45_chr5_76133561_76134403 388 A G 0.038045 + + 1 CATGAGCATGCTGTCTGCACAAtGGGAGCACCCGTGATGTGAGAGTAGCCAGGCCACCCT + >>>>>>>>>>>>>>>>>>>> + + 61 GGCTTGAGTGCTTTGTCCAAAAGGCACAATGGGAACTACACAGAAACAATCAGATTCACT + + +121 GCCTTCGAGGGTTTGAAGAAGACAGCTGAAGAGTAGGAGGTAGAAnCAAAAAGGCATGAG + + +181 AGGGGGAAGCAGAGGCTGCAAGACATGAGCTGGGCAGTACTGACgGGCCACACAGAGCAC + < + +241 TGGAGACAAGGTCAGGAGCCCT + <<<<<<<<<<<<<<<<<<< + +> Contig111_chr6_5821219_5822519 1060 A G 0.230765 + AvaI + 1 CGTCAGAGCTGTCTTCCCTCCAGCCAGAGGGGCCCTGAGAAGGAAGGGGGCTGAACCCAG + >>>>>>>>>>>>>>>>>>>> + + 61 GCgCCAGCCCAAGCTGCAGCGTGATCTGGGGGTGAGGCCCCCCGCTGCACAGGGGGCACG + + +121 GGGGTTCGGGCAGAGATCGGCTACCCATGGCCGGCGAGGCCACAgTGGCAATGGGCAGCC + + +181 AGCCTCCGACCAGCcGCCCCCnAGCTGCCTATTTAAGTCAGGAGCTTCTCCTTCCCgTGG + <<<<<<< + +241 AAGTAGAGGACAAATT + <<<<<<<<<<<<< + +> Contig102_chr6_30271329_30271577 39 T G 1.158547 + + 1 TCTTCCTTTATGCATCAGGGCAGCACCCTGGGGAGAAGnGGGGGGGACAcGTGTGTCCTG + >>>>>>>>>>>>>>>>>>>> + + 61 GGGAAAGGGGTGTTCCCACTCCCTGCAATGCCTTCCCCCGCCCAGACCAGCAGTTCTCAG + + +121 TCTTGACTGCATGGACTCTCCTGGAAGGCTTTAAAAAATGTGGAGGCCGAGGCTTACCCA + + +181 tGACGGTTCTGACTGAATTGCTCTGGAGTAGGGCTTAGGCACTG + <<<<<<<<<<<<<<<<<<<< + +> Contig112_chr6_51024554_51024851 100 A G 4.286925 + + 1 CTTCATCATACCTATCATTGCCTATCGTTATACTATAGAGGTATTGTTCATTCTTTTTTA + >>>>>>>>>>>>>>>>>>>>>>>> + + 61 TAGACTCATTGAGTAAAACTCAGGnCATGAGGGAAGGAACTTTGTCTCTTGTGCAATTCC + + +121 CTATCCTCAGTCCTTAAATATATGTATGCTAcCCAATAGGCACCAAATAT + <<<<<<<<<<<<<<<<<<<< + +> Contig84_chr7_6648683_6650255 1297 G A 0.165637 + + 1 GTTAGTTGTGAACACTCCCCAGGTAAACTGGTGTAACTCTTGGGGCAAAGCATGGAGTCT + >>>>>>>>>>>>>>>>>>>> + + 61 ACCCAAAAATGTAGAATTCTGCAGAGACAGCTGTTTCTTGGTTGGGTTTCTAGACCAGAA + + +121 AATAGAAGATTATAATTATGGGTGGGAAATATATGTGCAAAAAAGTATAAAAGAAGAGGA + + +181 ACAGAATAAAnGGAAATGGAAATGTTTGTAATTGATAGGGATGTGGATGTAAATGCCTGG + <<<<<<<<<<<< + +241 CAGAGAGGAGG + <<<<<<<< + +> Contig206_chr7_26281823_26282074 103 C A 0.947486 + NheI + 1 ATCCACATTCGCACAGCTCCTAATATAATATTTCATTGTTAAAATACTTCTGATTGGCCT + >>>>>>>>>>>>>>>>>>>> + + 61 AGGACATATTTTTACAACTGCCTTGACTTCAATTGCTAGnAGTAGCTTGCCAAAGAgGTG + + +121 CTTTAATAAAGGAAATTAACTTCTTTTAATATGTTGACTGATATACCAAGGTTTTAGTGC + + +181 TATTAGTTTACCTTCCCCAAAAGTGCTTA + <<<<<<<<<<<<<<<<<<<< + +> Contig38_chr7_50681997_50682600 42 T C 0.145997 + + 1 TAGAGCTCTCAGCATCCAAGCAGAATCTACTGGGTCTGACTGnGTTCTGCTCTGTCACTG + >>>>>>>>>>>>>>>>>>>> + + 61 GAATGACATTTCATTGCAGAGTACTCCTGCAGTACAACCAGGGCACAGCCTTTAAATTGA + + +121 CCATGTCCCCTGGTCTaCTCTGCTGAGCTaTGCACGGGTCCCTTCTGGTTCAAACACAGA + + +181 CTGATACAGCTCAGATGGAAGGGAGGCAGTTGCAGAGAAACAAA + <<<<<<<<<<<<<<<<<<<< + +> Contig91_chr8_12804505_12805470 409 C A 0.175272 + + 1 CTGTTTTCAGGGGCTACCTGCTATCTCCAGAACATGCCTGGCTCTCCTCCAAACACTGTT + >>>>>>>>>>>>>>>>>>>> + + 61 CAAnCTGACCAAAGCAGAGAGCTGTATATGGACCACACATACCAAAAAAAAAAAAAAGAC + + +121 AGTCCACACCCTCTGTATAATTATATGGTACAAATAATAGAGTTTTTGTTAACTACCAGC + + +181 TCTTTTTACAAAGCCTATCAAgTATCATAGACAGTATAATGCTGTGATTGCATCTGTGAA + <<<<<<<<<<<<<<<<<<<< + +241 CC + + +> Contig8_chr8_27811135_27812620 333 C T 0.272485 + + 1 CTTCAAGGAAAGGAGGCAGTTTGGACAAGTCAAAAAAATCCCAAAACtTTGTACTATATA + >>>>>>>>>>>>>>>>>>>> + + 61 AATCTGGCATATTTGTTGATGACanAATTGAGTTAGAAGCAAGAGTCAGAAGCTGACTTT + + +121 CATGCTGTTTTTCTGTTGTTTTCTGCGGCTCCCCTATGTACTAGTTCTCTTCCgGTGTGC + + +181 TGACAACTTCCAACTTcTCATAcCCTCTGCATTTCACGTTCTGC + <<<<<<<<<<<<<<<<<<<< + +> Contig17_chr8_57490059_57490498 69 G T 0.522227 + BglII,DpnI,MboI,Sau3AI,XhoII + 1 CACCAGAAAACAGGCATGGAACAGATTCTTTCAnATCTTTAAGAACAAACCAGTCCTGCT + >>>>>>>>>>>>>>>>>>>> + + 61 GACACATAGATTTTTGGACTTTTGGCCTCTGTAACTGTGAGAATAAATTTCTATTTTAAG + + +121 CCATCTACTTTGTAGTAATTTGTTATGGCAGCCCTGAGAAATTA + <<<<<<<<<<<<<<<<<<<< + +> Contig73_chr9_29451535_29452248 616 A G 0.448230 + Eco47III,HaeII + 1 ACCCAAGAGTCTGAGAGGCCCAGAGGCAGCTGGAGGCTGGAGGAGTCCCaCAGGCAAACC + >>>>>>>>>>>>>>>>>>>> + + 61 CTCCATTCCATGCGCCCCAGGGAGGCCAGGAAATCAGCnCTCCCAGGAGCAGGGAAGCAG + + +121 CAGTCCCTGGCATTGCCAGGGCAAGTGGCCACTCAGGGGAGAAAGGGGTGAGCTGGGGAG + + +181 GGGGAAGAGGGGAGGGGAGGGAAGGCAGAGACGAAGAGAA + <<<<<<<<<<<<<<<<<<<< + +> Contig96_chr9_39008495_39009278 215 A C 0.426539 + SspI + 1 TGGCAAACTCCTTGTGAATGCCACTACACTTTCTGGTCTCTGTATGTAATGCTAGATATT + >>>>>>>>>>>>>>>>>>>> + + 61 ACTGACACTTACcGCTACAAAGGCAAGACAAGCAAGACAACTGACATACACCCAgGTATG + + +121 GATCTATGAAGGGAGCTCCTTCTGCTAGAAAACAATATGTAAnTATTTACaTAACACCTA + + +181 CAATTCTAAATGGGTAGTTTCCCACATGTGAGATTACATCTTCAAGAGCCAAAGGACAAT + <<<<<<<<<<<<<< + +241 TTGTGCATC + <<<<<< + +> Contig22_chr10_15505382_15505589 172 T C 2.860867 + + 1 CGCAGGCGCCCCAATTATTCTTAACTCCTTATCAAAAGTTTTCCTAATTGAAACTTAAGC + >>>>>>>>>>>>>>>>>>>> + + 61 ATCACCTGTTTATTTCCTCTAAAATAAATGTATACATATAGAATTTCAGTAAGATAATGT + + +121 CTCAAAGAAGATGATAGCCATGGGAGAGGCTTATATGTACTTCnTATAATAAACAACGTC + <<<<<<<<<< + +181 CAGGTGTGATATT + <<<<<<<<<< + +> Contig69_chr10_40547265_40548153 371 G A 0.137642 + Bsp1286I,CfoI,HaeII,HhaI + 1 AAGGGGAAGAACTGAAGCGAGTGAGAAGCACGGAAGGACTTTTAGGTTTACAGCTGGGGT + >>>>>>>>>>>>>>>>>>>> + + 61 CACTGGTCTTCGCTATGGATGCCTCTCTTAAAGGAAAGACTAATTCTCTGTGGGTACTGA + + +121 AGGTGgGAGATGAATGTAGATGGGCnCTCGCATGTGTCAATGCTGACGGCTTGGTGAGAG + + +181 GTTTGGTGCGAGGCCAAAAAGGCgGGGATgAGAGAAGGATGACCTAGGGAGACTGCAGGG + + +241 TATTTAAAAGTTTGGGTCCAATTTTTCTCAAAGTGTGGCCAGTGCAC + <<<<<<<<<<<<<<<<<<<< + +> Contig9_chr10_51475063_51476054 770 C T 0.393903 + + 1 GTCTTCCTTCTAATCCCCaAGCcGTGAGAAGCTGTCTGAGCGCTCCTTGCTGGGCGTCCC + >>>>>>>>>>>>>>>>>>>> + + 61 TGCATGCCTGTACTGGGGCACACCTACGCCCTGGGTCCTGCTnCTGAAACGGTGTCTCAT + + +121 TTCTGTAATCGCTCCAAGCTTAATGGCTCTCAGCCTTGTGGGTTGCAGTGGAGAGAAAGC + <<<<<<<<<<<<<<<<<<<< + +181 ATT + + +> Contig72_chr11_7142765_7143772 146 G A 1.137400 + + 1 GGTGTAGTGAGGCTTCCACGAGCAGCCAGGCTTACAAACTCATCCTTAGCCTAAAAACTC + >>>>>>>>>>>>>>>>>>>> + + 61 CACnAAGTCAAGTATCTTGTGGGTGTTGAAAACTGTTCCACTCTGCAGAGCACCTCTATA + + +121 TGAAGTAATAATCATGGTATAATGTCCTTCTTCACATACCTGCCAAGAT + <<<<<<<<<<<<<<<<<<<<< + +> Contig7_chr11_40017076_40017630 352 C T 0.336170 + + 1 TCGGTCCTTCCTTGATCACATCTCCATGATCCTCCCACTGTTACTTGGAGGAGAATTGTT + >>>>>>>>>>>>>>>>>>>> + + 61 GCTTCCACAAATCAGATCTCTTTATTTTTCATTTATTCAACAAATGTGGACTGAGCTCTT + + +121 TGtATAGTACATTCTGTGGGCACTATTCACTAGACACACTGTAAACACTTCTGCTTCCTG + + +181 ACTTTGTTCAGATCTACCCCCnTGCCTGATCTGCCCTCCCCACCTGGTTTTCATCTCAGG + + +241 CTTAGGTCAAGCCTCATtTGCACTTC + <<<<<<<<<<<<<<<<<<<< + +> Contig16_chr11_53408448_53408790 187 A G 1.366749 + + 1 ATATTGCCAGTTTTAATGGGTGATATTTAGTCCTCCAATTAGACCTCTTTAGTGCATTGG + >>>>>>>>>>>>>>>>>>>> + + 61 ATACCAGTGAGCAATCATTCTGACAnAATTTCTGCTGCCTTGATTTTTGTGACAA + <<<<<<<<<<<<<<<<<<<< + +> Contig21_chr12_18403415_18404381 586 G T 0.068025 + + 1 AGTTCCAATGTCAGAGTCCCTCCCTCTACCTCCTATCCAACCcGCTACTTTTTTTnTTTT + >>>>>>>>>>>>>>>>>>>> + + 61 GTTTTACAACAAAAATAAACCTTCTTGTAACAATTCCAACAATTACAAAATAGAGTAAAA + + +121 TGTTTAAGTCTCTACCTAAACACACTCATCCTCAGAGAAACTCACAGGTAATTTCGGTtC + + +181 GTATCTTCCCAGACCCTCTTCTCAGcTTTCACACATACTACATACACATGAACTTCGAGC + <<<<<<<<<<<<<<< + +241 TGGCTGTT + <<<<< + +> Contig41_chr12_25565452_25566993 475 G T 2.230501 + + 1 TTACATAGCcAAGTGGGAAACAAAGcTACATTTTTnAATATTAATAAATCTGTTTTTTTA + >>>>>>>>>>>>>>>>>>>>> + + 61 AAGGGTTaTTATACAATATTATCAAACTTCTTGTGAATGTCAGAATCCAGAACAAACCTA + + +121 AAATCAGTAATACTTGGGAAAGACGCAAATAGTCCCTCTTTCCACT + <<<<<<<<<<<<<<<<<<<< + +> Contig5_chr12_53880670_53882675 1221 A C 0.061001 + + 1 AAGCCATCCATGTGTGTGCTTTCATAATATATTATGACAGGAGATTAAATTCTAAGTAAA + >>>>>>>>>>>>>>>>>>>> + + 61 GATTAGTCCCCAGTACAGTAGTGTAAAATAGGACTTTTCTCCCTTTTTCTCTCCnCGATA + + +121 TTCCAAATCAGAGTTTGGCCAAAAAgAAGTCAATAAGGACTTAcAAAAAAAAAATCTCCA + + +181 TTCACTGAAAGTAGCTTGCTAGCATTTTCCTTTCTcCTGATGTTGCTCCATAACTTCAAC + + +241 CCTTTTTAAAACTGTCTACTGTGGGGTAGACAGAAGGCGTGGTCGTGAGGTAAAGGTCAA + <<<<<<<<<<<<<<<<<<<< + +> Contig107_chr13_26045881_26046290 341 C G 4.509990 + + 1 CATAGATTGCCTTTTCCAGTCcAGAAGTTTAGAACAGACTGCCCTGAGATCATGGTGGGA + >>>>>>>>>>>>>>>>>>>> + + 61 AATATAATACTCATTAGGTTgTTGAAATTCTTGTAGGAATGGAAGAATTTCAGCTTAGGC + + +121 ATTCTGCTnCTGTATTCCCAGATTACAGTGGGAACTGTATGAAA + <<<<<<<<<<<<<<<<<<<<<<<< + +> Contig251_chr13_28498333_28501066 864 T G 0.067573 + + 1 GCCCTCTGGCTTCTGTTTGGGAGGTAGGGCGGGTGGGCAGGAAGGGAGGACGGTCGGGGT + >>>>>>>>>>>>>>>>>>>> + + 61 ATTGGTTCnCCTCCTCCtGCTGGGTCCCAGATGGATACAGGCCAGGTCTG + <<<<<<<<<<<<<<<<<<<< + +> Contig55_chr13_53467708_53468101 221 T G 5.717222 + HinfI + 1 AATACGGTGAAGAGCAAATGAGAAACATTTCTTCAAACATTTGTAAAGTGAAAATATTTA + >>>>>>>>>>>>>>>>>>>>> + + 61 AAATGAAATAGATnCCAAATTTTTTCTTCCAAtGGATTATCTACTGGGTTCTGAATATCA + <<<<<<<<<<<<<<< + +121 CAAAGACAAATG + <<<<<<<<< + +> Contig48_chr14_11839435_11843272 3014 A G 0.907583 + + 1 GTGCTTCCAGTCAAAGGGGAAAACTTGATAGACAAAAGTTTGGATTTTTTTTTTTTTCCT + >>>>>>>>>>>>>>>>>>>> + + 61 TCTCCTTGGGAGTATGTCTGAGTTACCGTTTTTAGTTTTGATCTGTGGAAAAAGTGATTA + + +121 TATAGGTTCCAAATCTTACTTTTCCCTTTTTGTTTTCAATAGACTTTTTGTGATCATTTC + + +181 AnCATAGTTTGTATTATTAAGTAGGGGTTTTtTTTTGTTTTGGTTTTTTTGTGGTTGTGC + <<<<<<<<<<<<<<< + +241 GTTGTAAG + <<<<< + +> Contig28_chr14_26905747_26909514 975 G C 0.116622 + AluI + 1 CTGGTACGTGCTTCTCCTCCTGCAGCCCACCGTTTACTTGGTAAGTCGCTGCCGATCCGG + >>>>>>>>>>>>>>>>>>>> + + 61 CGCCCCCGCAATCCCACCCTCGTCGCGAGGACAGACAACCAGGGGCGCGCGGGAGGAGGG + + +121 TGAGACCGCCAGTTCAGCGGAGCAGCGTTCCTAGCGACCGTGTTGGAACAACTTTGGCAA + + +181 nCTGGTCTTTGGATCCCTGCGGGATTTTTCGGGTTTCCCACCCTCATTTCTTGCTT + <<<<<<<<<<<<<<<<<<<< + +> Contig64_chr14_56768376_56768902 473 C T 8.281311 + + 1 ATAAGAATCTCCTCAGTAGAGAGAAGCCTGATCTACCATGATTTTATTTGAGTAAAACCA + >>>>>>>>>>>>>>>>>>>>>>>>> + + 61 TTGAAACAAACAnTTCAAGAAAGATGGTCAGAGAAGCAAAATGTAA + <<<<<<<<<<<<<<<<<<<<<<< + +> Contig60_chr15_18493036_18494316 150 G A 0.125024 + + 1 CGCCTGGAATAGCATGGTGCCTTTAGGAAATTACATCTAACTCTCTAGGGCTGGAAGGAA + >>>>>>>>>>>>>>>>>>>> + + 61 CACTGAGTnAACGTAAAGAATTGTGGGAGAGAAGCCTTTAGTTAGATCATGCAGGGCtCC + + +121 GTGCTCCAAATGGGCTTTGTGTTTTG + <<<<<<<<<<<<<<<<<<<< + +> Contig112_chr15_26772864_26773267 374 C T + +> Contig119_chr16_6160274_6160477 180 G A + +> Contig60_chr16_28079136_28080263 588 T G 5.998983 + NsiI + 1 TTAGAGAATTATTCACTCCCCCAAAAGTAATAAAAATATAAGAAACAAAGCATAATCATA + >>>>>>>>>>>>>>>>>>>>>> + + 61 ATGCAnTGGTTGAGTTAGTAGTAAATAACATTTTAGGGTCATAAATTAAAAACTGAATTG + + +121 AGATTTAGCTGGAAATTGTGATATAAATGTCAGGATAAGAGAAGCAAGATTGAAAGAAAG + + +181 ATGGATTAAAAATGCTAAATCCTTCTCTACTATTACAGGAAATTGATAAAAGAAGAGAGA + <<<<<<<<<<< + +241 GGAAACAGCACATAT + <<<<<<<<<<<< + +> Contig31_chr17_12128267_12129637 205 G A 0.246305 + + 1 TGGAGGCAATGGAGGTGAATGAGCCCCAGTCCTGGACCTCgAAGCAGACTGGCCAGAGAC + >>>>>>>>>>>>>>>>>>>> + + 61 ACCAGGATTTAAGGCATGTGATGAAGACACAGTTCAAAGTGACGAGCCCTGCAGACTCTT + + +121 CnGGAGCAGAGgTAGAGTGATGACCCGTACCTGGAAGGTTTTAGGAAGGATAACAATGAA + < + +181 TTTACCAGAAGGCAGGGGTAGA + <<<<<<<<<<<<<<<<<<< + +> Contig99_chr17_26021506_26022200 505 C T 0.171977 + RsaI + 1 TGTTGCCATGTTGCCAGTATGTTTTTTTAAGTTTTCCTTTTTAATTTCATTTATGATATT + >>>>>>>>>>>>>>>>>>>> + + 61 TTTTGGAGTAnaGAAGTTATCATTTCACATGATCAACTTTTCAGTCTTTTTCTTTATAAT + + +121 TTTTAAcTTTGTTGTCATGTTTAGAAAGGTTAAATTTATACCTTGTAAAATAcCTTCgCA + + +181 AATTTACATTTGGGAAATTATTAGTAGTATTATTTcAGGAAGTTaTTATTTTTAAGTGTT + <<<<<<<<<<< + +241 GGGTTCCCGTGA + <<<<<<<<< + +> Contig27_chr17_61713766_61716585 1056 G C 2.199527 + Eco47III,HaeII + 1 AAGAGGCGCAGGAAGGAGAGTCCGCCcGCCGCAGCCCGCCCGCCGGCTCCTCAGACAGCn + >>>>>>>>>>>>>>>>>> + + 61 CTCGCaGGTCCTCCAGCCTTCCAGCGAGAAGAAAGAAAGAGCGTCACCGGAAACCACCGA + <<<<<< + +121 AACTCTGGGGTAGAGCG + <<<<<<<<<<<<<< + +> Contig229_chr18_3706523_3708577 1076 A G 0.444778 + + 1 TTTAAACTCCCGTGTCTGTGCTTGATTATGGCACCGTTAcTCTCGGACGTATTTAATTTT + >>>>>>>>>>>>>>>>>>>> + + 61 CTGATTCTGATTCATTGGTCTATTACATGAGCAATTGGTGGnAAGTGATGTCTGTCTGTG + + +121 GCCCTTACATTATTTATAATAAAACTCTCCTTCAAAGAACCTTTGGACGATGTCTCCACA + + +181 ATTACAGAATGAGTACAAATTAGTTTTCTAAAACAGCAACTGGTGGTTAATTAAGTTTTG + + +241 TCATGTTTTCTGGAGATGAGTGTCTCATGGTTTGGATACTATGAAGGCATTTCTGCAAGG + <<<<<<<<<<<<<<<<<<<< + +301 TT + + +> Contig82_chr18_27305489_27306229 566 C T 0.348750 + + 1 CCTGAGAACTTCAAGCTCAGCGGAGGGCTGAAAGGGAGGTAACCACTTTTGTACTAAATT + >>>>>>>>>>>>>>>>>>>> + + 61 GTCACCTCCTTGCTTATTTTCGTGAAGTTCTAAAGAACACAACTATCTCACTAACACAAc + + +121 AGATTTATTATTGAGTTGTCAGAATCAGCAGCTTTTAGTCACngGTCACTTGTGTGCCTC + <<<<<<<< + +181 CACTCCATCATAACT + <<<<<<<<<<<< + +> Contig64_chr18_55979770_55980315 49 G A 2.123800 + + 1 CCCCAAGGAGACAGGAGGGCAGGCTGTGTGGGTTTCCTGGCCCGCAAnCCCTGTGCAGGT + >>>>>>>>>>>>>>>>>>> + + 61 GCGgTTCTGCCAGGCCCGCAAATCTCGGTCTCACTTAACTGCGGCATCATTTATGCTAAT + <<<<<<<<<<<<<<<<<<<< + +121 G + + +> Contig146_chr19_5221790_5223013 143 A G 0.869806 + + 1 TTAGAATGGCTTTTTCACGGAAGGAGATGAGTTATAAAGTACGGgTGACATTTTTTTGTC + >>>>>>>>>>>>>>>>>>>> + + 61 TTGnGTTTTTTTTTTTTGTCTTGTTTTTAACTGTTGTTTAAGTCAGCCAACAAGTACATA + + +121 ATTTCTCAGCCCACATTTAAAAATTATCAACTCATTTTCACTTGGAGGTGTGGACATAAA + + +181 GCCATAAATATAATTTGCATTCTGCTGACCTGTTTC + <<<<<<<<<<<<<<<<<<<< + +> Contig129_chr19_25541958_25542221 202 T C 2.550968 + HinfI + 1 AAGAATCAAGCATGCATTCTGCCTTCCCCATGTGAAAAGTACCAGGTGAGGATATGTACC + >>>>>>>>>>>>>>>>>>>> + + 61 TCTTTATATCCATGTTCCAAGAACAACAACAACAACAAAAGAATGAGAGTnACCACTTTA + + +121 CAACCCCCAAAGAATTAATGGATT + <<<<<<<<<<<<<<<<<<<<< + +> Contig60_chr19_54013816_54014398 281 A G 1.271267 + + 1 ATTTCTCTCGCCGATATTGAGGTTAAGTATCCCTCTAGGCTAAAAGACCAGCAGCTTTTC + >>>>>>>>>>>>>>>>>>>> + + 61 TTAAACCTATTACAGGAATCCCAATAATGGAAAGAAACGAGGGGAGGCAGTGCTCATGTC + + +121 ACATTCTTCCAGAAATCAAATATAnTGGGTTTTTTTGTTGACGTAAATACATAGGTTGGA + + +181 AAAAAGGTAGGGGGAAAGGAAAA + <<<<<<<<<<<<<<<<<<<< + +> Contig50_chr20_12138509_12141975 3206 C A 0.383804 + + 1 TCACACCAGGCTCAAGGTTAAGGCAGAACACAAGATAAGAGAGCAAGCTGGCTTCCTGTC + >>>>>>>>>>>>>>>>>>>> + + 61 CCCCAGCTGGCtTnCCCATGGGAGCAGAAGCTGGATGGGTGCAGCTGCTGGCTAGGGATC + + +121 CTGTAAAAACTGAAGACCTCCaGTCTCCAGGGCTGGAGGaGGGATTCCTGCCCTGGGGGC + + +181 AGGCCaGATGAGAGGGATGCGATAATGGCAGGTGTCTCCACAAGA + <<<<<<<<<<<<<<<<<<<< + +> Contig36_chr20_32631363_32632049 176 G A 1.149790 + + 1 CTGCCCGAAACAAGTTCCTCATTGTTTCCTnCGTTCTGTGCTGTGGCGGTTTCTTCCTGG + >>>>>>>>>>>>>>>>>>>> + + 61 ACCCAGAGTCCTTTTCcGAACATTAGCAACTCCATTATGCCACACAGAGATGAGATTTGA + + +121 GAAAGGAAAATAAAGTTGTCTCGTGATATGGAGGGCAAAGCTGATAG + <<<<<<<<<<<<<<<<<<<< + +> Contig50_chr21_4178523_4178687 121 G A 0.483377 + + 1 GTAGGAATCTCAAGCCCCAATCTACTTTTCAGGAAGCTGAGGCTCAGAGAAGTAAAGTAA + >>>>>>>>>>>>>>>>>>>> + + 61 CATGCTCAAATTCACACCAGTAAGTGAGAGAGTTnTAAGTAACTATAGTAAGTGACAGAG + <<<< + +121 CTGGGATTTGAACCCTCAT + <<<<<<<<<<<<<<<< + +> Contig129_chr21_31045749_31046924 381 A G 0.028026 + AcyI,Hsp92I + 1 CAGCTGAAGCACCCTCTCTGACCAAACCTGATCTTTCTTTTGGGGATCCTTGACnTCTCA + >>>>>>>>>>>>>>>>>>>> + + 61 TAAGTCTTTATGAACCATTTGTCCTTCCAGCCATCATTTCCTTCAAC + <<<<<<<<<<<<<<<<<<<< + +> Contig159_chr22_7896450_7896974 109 G C 0.465232 + + 1 TAACTGAGTGATAGTGCTTGGcGCAAGACACTAGCAAnCCTGTACTCACCTTCCATTCAT + >>>>>>>>>>>>>>>>>>>> + + 61 TTATGTCATAATAATAATTCTTTAAATATGGAAAGcGTAgAAACAAAATAGGAACACTGC + + +121 TAAGTATTCATTTAGGTAATAAGTTTAGTGCTAGATGTGTGACAGGAATTATTTTcATTA + + +181 ACCACAAGCAAACATTTATGGAATGTCCATTGCATGCTGAAATGTA + <<<<<<<<<<<<<<<<<<<< + +> Contig23_chr22_34612023_34612568 167 C G 0.409430 + + 1 TATTCTACCACTCAAAGCCAGCCTGAAGGAAnCCTGGGcTCTTTCCATCAGCTATCTGAC + >>>>>>>>>>>>>>>>>>>> + + 61 AAGTTGATCTAAAcgTGTAGAAAGCATGCCTGGCTCCACACCTGATTTCATGTGGAGCCA + + +121 TCAGCTCTCACACGATCACCTTG + <<<<<<<<<<<<<<<<<<<< + +> Contig26_chr22_57817664_57819633 1453 A G 0.471213 + RsaI + 1 TGCcCACCCACATCAcTGAACAATTCAGAGAAGATTCCTTTAACATATGCATTCAATGTT + >>>>>>>>>>>>>>>>>>>> + + 61 TAAGCCTCGCTAACATTTTTTAAGCACCGAACCTTTTTAAAAAGGGCTCTAAAAAATAAG + + +121 CATGAAACTAAATCTCTCTAATACgTCACGTGACACACATGTAtATAACCCAGAAGGTnC + + +181 ATCTAGGGAAACGCAAAAGGAATTATG + <<<<<<<<<<<<<<<<<<<< + +> Contig133_chr23_3525134_3526502 1223 A G 1.358849 + + 1 TCCTATTTTGTCCCCAAGTCCCAGGTTCAGGAGCTCCATTAAGTCACAGGTAATTCAGCT + >>>>>>>>>>>>>>>>>>>> + + 61 GAGAGCCTGCAAAATGGCAACCCCACCTGAGGCACTTTCTTTAAATCAACTGTATCAAGG + + +121 TAACATTTACACAGAATAAnAAGCACTCATTTTAAAGAAATAGCTTGATGAGTTTAGTCT + + +181 AATTGTATCTGTGTAGCCACcACACAGTCAAGATa + <<<<<<<<<<<<<<<<<<<<< + +> Contig35_chr23_28447813_28449115 70 T A 0.163155 + DdeI + 1 CCTTTTCTCTCCATTCACACCCCATCCTTCTTnGTCCCTCCAAAACTCCTAGCTGTTTCC + >>>>>>>>>>>>>>>>>>>> + + 61 CATTTTAGGGTCTCTGCATTTGCTGTTCCAAGCAAGCTCTGCCCCCAAATGATCTGGTGG + <<<<<<<<<<<< + +121 CTTGTTCCCTC + <<<<<<<< + +> Contig50_chr24_22515247_22516072 761 C T 0.190253 + + 1 GGGGAGACCCTGATCCATCCTCATTCTACTGCTTCCCGATGTCCCAGGCCTGCtGTTCTA + >>>>>>>>>>>>>>>>>>>> + + 61 CACGAAAGCCCATTCngTGCCTCCAAGTAGGGAGCAGAAGGGAAGAACACA + <<<<<<<<<<<<<<<<<<<< + +> Contig84_chr24_29196623_29199644 466 C T 0.214603 + + 1 TAATTGGACaCTTTTGACTTGCGTTTCATGATTTTGCCCCATTTTTCTCTGCnGCAATTT + >>>>>>>>>>>>>>>>>>>> + + 61 GGCCAGTGATTCCTGTCTTTCCCTCTATTATCCACTCTGATTAACTCAGCTGCACCTGCC + <<<<<<< + +121 AGCCTTTATTCCTGCA + <<<<<<<<<<<<< + +> Contig144_chr25_4011170_4013134 541 A G 0.086768 + + 1 GGTGAGGtGGAGAGTGGCAAGAGCTGTTGGTGGGCGTGTGTGAGCCAGAGGGCAAGCGGG + >>>>>>>>>>>>>>>>>>>> + + 61 GAGCTCCTAACTGCAAcATCCAGGGGCAGTCGATACTGCCTGGGAAGTAGGAACTGCTCT + + +121 GGAGCATGAGTGGAATTAGCAGATGGATAACAAGGGAGnGCGAcaAGGGCATTTTATGAA + + +181 GATGGAACACCTTGGAAAAGATCAGATTGCTGAAGCATCCGTTTGAGAAAGCACAGATAA + + +241 CTTTTCAAATCTGAAGAGGAGGGACATGACGGGGAGATGAGACTAG + <<<<<<<<<<<<<<<<<<<< + +> Contig103_chr25_38891221_38892140 407 G A 0.166581 + Alw44I,Bsp1286I,CfoI,HhaI + 1 GTGGGATGCAGGTGCTGTGTCTACCCACTTCTTCCGGGGACCAGCCCCTCTCTGGCCACA + >>>>>>>>>>>>>>>>>>>> + + 61 CCCACTTCCTCTCATCTTAACTGTCCAAATTTGCTGACTCAAAGGGATGTGTGTGCGTAT + + +121 GTGTGTGTGTGCnCACATGTGCATGCATGTGTTTTGTGTCTTTCACTCTCAAAATTATTT + + +181 AAGTTCCCATGGCCCTGCCCTGATTTATCTCCCAAAT + <<<<<<<<<<<<<<<<<<<< + +> Contig204_chr26_4311195_4311778 170 C T 0.085422 + + 1 AACAGAAGCCTGTCCCAGCTACAGGAGGGAAACGGGCTCGGCAgCgTGGCACTGCCTCAC + >>>>>>>>>>>>>>>>>>>> + + 61 tGTCACCCCCAGGAGCCCGGGAAGCCGTCCCTTGAnTCCTCAGTGACGGTGACCATGACC + + +121 AAGGGCAGTAACTCTGCCCGCGGGACACAGCgCTCCTGCTCCCgACgGAAGGTGTGCCGG + + +181 CCACAGAGCGCACGTTGgGGCCgAGTTCAGGGGCAGAtAGGAAGACACAGGa + <<<<<<<<<<<<<<<<<<<< + +> Contig146_chr26_26622638_26623906 574 G A 0.318381 + + 1 TTTCTGAGATCACACAGCCAGGAAATGGGGAGCCAAGATTTGAACCCAAGCCTGTCTGAT + >>>>>>>>>>>>>>>>>>>> + + 61 TCTGGAACCTGCACCAGAaCCACACCTCAgCCCTGCCTTCCCTTGGAAGGCTtACcnTTG + + +121 TGCCTGGAACATAGTAAGTGCTCAAAAAATGGTcTAAATCATCATCGTGTaTTAGGAAGC + + +181 CTGGGTCCACACCCCTTGGGCTGTGGAGTGTCTTGAG + <<<<<<<<<<<<<<<<<<<< + +> Contig135_chr27_6853874_6854079 158 C T 0.060201 + + 1 AAAGGGTTCCAAGTTACGGGATTcATACGGGAAGGCTCCcGAAATAGAAATGATCGTTGT + >>>>>>>>>>>>>>>>>>>> + + 61 AACATGGGGAGATTTGTCAGGGACAgACAnGAACTGTCTTATAAAATGCAGCCCAGTTTT + <<<<<<<<<<<<< + +121 CTTcTTGAGA + <<<<<<< + +> Contig64_chr27_34654435_34654621 132 C A 0.296658 + RsaI + 1 AAATTGGTCAGTGACTGGGAACACGTTCCGAACCAGCTCCGTGGATTTACAAGTTTTCCA + >>>>>>>>>>>>>>>>>>>> + + 61 GTAGAAACGGTCCTCCAAGCGTnCCTGAAGTGCTcATTCATTACCGCAAGGTG + <<<<<<<<<<<<<<<<<<<< + +> Contig131_chr28_6481806_6483783 138 C T 0.387007 + + 1 AGaCCCTCGAAATTCTCCAGTTGTCAAATTCTTCCCCAGTnTCTGCTTGAgAGATTTTCT + >>>>>>>>>>>>>>>>>>>> + + 61 CCTAGCTTCAGAGCCTTAACTACAGAATACTGAGTCTTTGCTCAAGCAGCGGCTCAACAC + + +121 ATAACCCCTAAGCTGCCAAGGCTTTTCTCCCCCAAGACTTTGTTTCCTTCCACGAAACCT + <<<<<<<<<<<<<<<<<<<< + +181 TC + + +> Contig60_chr28_30197166_30197364 92 T C 1.139483 + + 1 AATCAGAAAGTCCCAGAGGTGGAGACTACAGCAAATTACCTGACATTTGTCTTTGATGCT + >>>>>>>>>>>>>>>>>>>> + + 61 nTATGTAAAAACTCTGGGTGGCAGGAAAGCACTTAAATTTACCTTGTAGAGCTTTGCTAC + + +121 CCAATAGAACATTCTGTGCTGATGGGAATG + <<<<<<<<<<<<<<<<<<<< + +> Contig29_chr29_4726399_4727143 559 A T 3.113735 + + 1 CTTTTTGTGGCCAAAAgTGACAACATAATTTTCAAAATGGGAAACGATGATTACAAATGA + >>>>>>>>>>>>>>>>>>>> + + 61 GTGGAACACATGTTACAGTGGCAAGATGTGTGAGCAATGCTGATTCAGGGTATAATGGGT + + +121 TGGTTGTAAAACAAATATGAGTTTCTAATATTCGGGCATATTAAACAATCTAAGTTnTAC + + +181 AAAATCTCTCTTGTACTATTTATTGGGTAACTACTAGTAAAGGAAAGGCCTAATAGGCTG + <<<< + +241 TTCCCATAAAAAGAAGCTAC + <<<<<<<<<<<<<<<<< + +> Contig1_chr30_5992217_5993068 106 C T 1.078937 + + 1 CTTGAGACAGCCATGGTGTTTGTTTCTACCTTTCCTCTAAGAAGACACCTGTATACAGAT + >>>>>>>>>>>>>>>>>>>>> + + 61 ATTCCnTGTGACTCACACTCATCCTCATAGACATCCCCAGTATCATTTCTGTGAAGCCTT + <<<<< + +121 CCTTGACATTTTCCAACA + <<<<<<<<<<<<<<< + +> Contig165_chr30_25804389_25804926 190 T C 0.328844 + + 1 CCGCTTGTCCCGCTCTGTGATTTAGATGTTTCACGAGCGGGAAGGTGGGGGGATTGATTC + >>>>>>>>>>>>>>>>>>>> + + 61 TCTnATTCGCGCTTCTCCGCCCAGGCTGCGCATTAGAATCACTTGGGGAGCTTTAAAACA + <<<<<<<<<<<<<<<<< + +121 TGCCAG + <<< + +> Contig38_chr31_5164423_5166573 2074 C T + +> Contig17_chr31_26433828_26434459 498 T C 4.814134 + + 1 CCATGCAATCTCATGCAATGGTTAAAAGCAATGAATTTGTTGTACTCAAAATACCTTGGT + >>>>>>>>>>>>>>>>>>>> + + 61 TGGGTATTAAAAAGTTTTnAGTAAACATAATGAAAAAAATAGAAGTAGATAAGATCAACA + <<<<<<<<<< + +121 CACCTACACGAATTA + <<<<<<<<<<<< + +> Contig9_chr32_19479532_19479735 12 A G + +> Contig30_chr32_25902721_25905783 208 C G 0.322381 + AluI,HindIII + 1 TTCACAGTGTTCTCCCAAGGCACAAATAGAATGCTCAGTCATTGGTTATTTTACTTAGAT + >>>>>>>>>>>>>>>>>>>> + + 61 ATTCCTTTCTAAATATAGACTTACCTGTTATTTTTTCCTATACTAATAACATTCAAATTA + + +121 TCTATGTGTACAATAATAAACACTAGGCATAACTGTATCTCAGTACCAATTTCCTTAGAA + + +181 GGTAAAnCTTATTTCAGTCAAGGTCTAGGCCAAGCATTGA + <<<<<<<<<<<<<<<<<<<< + +> Contig18_chr33_22207246_22209159 1363 G T 2.559961 + + 1 ACGACTGCCCTTTTTCCCTCTGTCTCTATTTCTCCTACACACACACACACACACACACAC + >>>>>>>>>>>>>>>>>>>> + + 61 ACACAgAGTGAGCTTTTAGCAACCTTGTTTAACATTTGGAAAGGAATAGCTGACACAACA + + +121 GAGGGGGGnATAAGTAAATACATTGCATGGCTGTATATAATTGAACATTCTTCAAATTCT + + +181 TTAAACAGAAATTTCAGTACCATGGAGATCCTTGAAAT + <<<<<<<<<<<<<<<<<<<<<< + +> Contig170_chr33_26189421_26189940 292 T C 0.307330 + + 1 TTCAAGTTCCCCTTTTGTGCCTTCACACACTTGTnTTTATGGTCTCTATTTAAAAAAAAg + >>>>>>>>>>>>>>>>>>>> + + 61 AGACAAAAACCTCTTAATAAATTTAGGAAGTAGTCCTCACTCTTTAAAGGAATTGTGCTT + + +121 AAAGCAGCAGCTCTTCCTCACTCCTTG + <<<<<<<<<<<<<<<<<<<< + +> Contig113_chr34_13341080_13341643 236 C T 0.412222 + Hsp92II,NcoI,StyI + 1 AAATGCTCATTTCCCAACATTCAGTGAAATGCCTGATGACTAATCCTTGCTCCaTGGATC + >>>>>>>>>>>>>>>>>>>> + + 61 CTGGGGGTCCCGTGCACAGAAGAGGGTGAGGTCTCTGCCAnGGACTGTGTCCCTGGAAAT + <<<<<<<<<<<<<< + +121 GACAGGGCA + <<<<<< + +> Contig152_chr34_31794848_31795540 242 G A 2.779642 + + 1 ATAGGAAATAAAACCCCAGCTCTCAGAGnAAAGCAAAATACTTTTAAAAAGATGAAAAAG + >>>>>>>>>>>>>>>>>>>> + + 61 CATCAGAGCTATGAGACACAGAAGATCTAGAGTATAATTGTGTTTTTGTATAGAAGGGAG + + +121 AGAAGGAATGCTGCAGGAGCCACATTTCTCCATCTA + <<<<<<<<<<<<<<<<<<<< + +> Contig47_chr35_3666773_3667898 348 G T 0.234571 + + 1 GCTGCCTCAGCAGTTATCTTGGGTTCTGTTAACTTTGACACACCTTTCACGAAGAAATTC + >>>>>>>>>>>>>>>>>>>> + + 61 TTCATTGCAGTGCTTGAACAATCTGATTGTTCAATCTGATTnGATTCTATTTCTTGCTGA + + +121 GATAATGTTCTAGCACCTTCTCTGTGGATCCCCTTAT + <<<<<<<<<<<<<<<<<<<< + +> Contig74_chr35_25394343_25394813 303 A T 4.297720 + + 1 AGTTTCCCCAAATGTTCATGATTAACCAGGTAAACTGAAGATTAACCTTAAATATATATT + >>>>>>>>>>>>>>>>>>>> + + 61 TCTTTGAGTCATTATAATTAAATTAACTAGGTTGTTTTCAAATATACTAATAATAGAAAC + + +121 TGAAAAAATAATCCAAGTAATATaTCTGAATTGAAAAAAAAAGTAAGGCCATTGTATAAA + + +181 ACAACTGAAAGTTTTTGGAnAAGGTACTATTTTTAATTTACAGTGCATTTTTTTAATCGG + + +241 CATTTCAAATAATAACTTCAATCaCACACACAAAAATAAACCAAATCAACTGCATGTAAG + <<<<<<<<<<<<<<<<<<< + +301 GGaAGT + <<< + +> Contig5_chr36_4562983_4563634 343 C T 1.168507 + + 1 ATATGAATGGTGGTGATGGATTCAGCATCTTGACTCTTTTTCAACTATGTCAAGATTTGC + >>>>>>>>>>>>>>>>>>>> + + 61 ACTGGATCTTGTCTAAAGTCACTCTTCTAGGGGAAGTCAAAGAGACTGGGTCaGTCCtCA + + +121 AGATAcGATGTAAGCAGGTAAGATAGCACTATAGTAGGTCTTCTTGTCATGGTGAGTCAA + + +181 TAACCATTCAATATTCTTTCnACCTACTCTTTACCTGCTCAATCAAGGTAGGGGTC + <<<<<<<<<<<<<<<<<<<< + +> Contig133_chr36_32954045_32955409 136 A G 3.772017 + TaqI + 1 ATTAAATGAAAACAGTGTCAGGCAATAAGATGTATTAAGTACAGTATGCCTGAGGATATA + >>>>>>>>>>>>>>>>>>>>>> + + 61 ATATTAAACACAGATTCTGCTGTTACTATCnAAGTGGATATTAAAATAACAGTGCTACTT + + +121 TGAGGGTAATGCTACTTTGGAGAATATTTTCTAATAAGCTCACCaTAAAATGACggATAA + <<<<<<<<<<<<<<<<<<<< + +> Contig53_chr37_6665763_6665919 116 C T 10.874746 + BstOI + 1 AGTCCTCATGTTGTACTTTACCTCACCTGAATTTACTCATCtGATAGTTGGAAATTTGTA + >>>>>>>>>>>>>>>>>>>>>>>> + + 61 TCCATTGCCCATCtTCACCACCCCATGTCnCTGGAAACCAACAAtCTGTTCTCTGTATGa + <<<<<<<<<<<<<<<<<<<<<<<<< + +121 CTT + + +> Contig2_chr37_31197993_31198256 182 C T 0.594606 + + 1 CTCTCACCACATGGAGAATCCTGTATGTTCAGCTGTATGACGTGGGGGGAACGTCAGAGC + >>>>>>>>>>>>>>>>>>>> + + 61 TCAGTTTCATAGCAGTCAGCTCCATGTTATGGGTTCAAgAnGAAAACAGGTGGCAGGCtT + + +121 GCCACAGCCTCCCTCAGGGGTGgCCTTGACAGATAAAcGT + <<<<<<<<<<<<<<<<<<<< + +> Contig7_chr38_12217200_12218387 1163 A T + +> Contig265_chrX_2689247_2689484 114 C G 9.232233 + + 1 CTTAGAGAATTCCCTGATTCACTGAGTTAAATTATTACCAAATCTGATAATAATAAAAGA + >>>>>>>>>>>>>>>>>>>>>>> + + 61 AGTAATTACAGATCAATAATTAATCTATATGTCTGAATACATTTTAATAAGTCCnAcTCA + + +121 ACAATATGCTGACAAAACAATACATCTTGTCT + <<<<<<<<<<<<<<<<<<<<<<< + +> Contig113_chrX_26287829_26288398 385 C T 0.077485 + + 1 AAAGCCGTAACAGTCGCTAGGAGAATCATAATTTTAAGCTTTGTGTGTCCCGGGcTTGAG + >>>>>>>>>>>>>>>>>>>> + + 61 TCCCTCAGGAGTAGTTAGATGCGGCCTTAAATTCTCcCAGTAAATTCACnTTGACGGCCT + + +121 ATTTTTGACCTGGGGGCACACGCTGCTATACACTCTAGCCACCTCTGATCCTCTGGCCTC + + +181 CTCTGTTACAATGACAGAAACGACAGAAGCATTTCTTTAAAATAAGTCCCAGTACGTGCA + + +241 CACAAACGTTCAGGGCAGCCTTCTCCATAAACGGCACGAAATGGC + <<<<<<<<<<<<<<<<<<<< + +> Contig90_chrX_57430715_57431566 548 C T 0.153995 + EcoRV + 1 CTCATTCCCAGCTACCTCCACCTCTATACCAACCCCTAGTTCCTGTACATCCCTGCTTCT + >>>>>>>>>>>>>>>>>>>> + + 61 ATAGGAAATCTTCCTGGTGTTGATATnATTCCCAAGGTCAGGCTGTCCTCCTAGCTCCCT + + +121 CTCCTCATCTGCATCAAGTCCTCCAAACTGGGCAGTAGAC + <<<<<<<<<<<<<<<<<<<< + +> Contig133_chrX_84833782_84834125 182 G A 0.277794 + + 1 CACCAGAGTGCAATCGAGAACCATCTGATCACAGAACCATAGAAAAGATTGCTGTACAAG + >>>>>>>>>>>>>>>>>>>> + + 61 ACTTAGGAACTCATTCTGTTCAGGATGGAGAAGCTGATGCCCAAAAAGGGAAAGGAACTT + + +121 AACCAAAGTCCATACAnTATCAACTCTACACATAAAGGAAGGGAGTGGAGGGAGCAGTAA + + +181 GACCAGAGATATAGACCCCAGTGAGGAGGCTGTGAGCTCCTG + <<<<<<<<<<<<<<<<<<<< +
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/test_out/find_intervals/find_intervals.interval Fri Sep 28 11:35:56 2012 -0400 @@ -0,0 +1,1 @@ +chr2 9817960 67331624 1272.2000
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/test_out/map_ensembl_transcripts/map_ensembl_transcripts.tabular Fri Sep 28 11:35:56 2012 -0400 @@ -0,0 +1,150 @@ +ENSCAFT00000000001 476153 cfa00230=Purine metabolism.cfa00500=Starch and sucrose metabolism.cfa00740=Riboflavin metabolism.cfa00760=Nicotinate and nicotinamide metabolism.cfa00770=Pantothenate and CoA biosynthesis.cfa01100=Metabolic pathways +ENSCAFT00000000144 483960 N +ENSCAFT00000000160 610160 N +ENSCAFT00000000215 U N +ENSCAFT00000000233 483973 N +ENSCAFT00000000365 474414 cfa00450=Selenocompound metabolism.cfa00970=Aminoacyl-tRNA biosynthesis +ENSCAFT00000000507 484023 N +ENSCAFT00000000517 476233 N +ENSCAFT00000000674 611986 N +ENSCAFT00000000724 609478 N +ENSCAFT00000000760 U N +ENSCAFT00000000762 U N +ENSCAFT00000001047 475067 cfa00240=Pyrimidine metabolism.cfa00410=beta-Alanine metabolism.cfa00770=Pantothenate and CoA biosynthesis.cfa00983=Drug metabolism - other enzymes.cfa01100=Metabolic pathways +ENSCAFT00000001052 U N +ENSCAFT00000001063 481999 N +ENSCAFT00000001076 U N +ENSCAFT00000001104 607591 N +ENSCAFT00000001141 484064 N +ENSCAFT00000001146 475076 N +ENSCAFT00000001204 481203 N +ENSCAFT00000001219 474465 N +ENSCAFT00000001250 481729.481731 cfa04145=Phagosome.cfa04514=Cell adhesion molecules (CAMs).cfa04612=Antigen processing and presentation.cfa04672=Intestinal immune network for IgA production.cfa04940=Type I diabetes mellitus.cfa05140=Leishmaniasis.cfa05145=Toxoplasmosis.cfa05150=Staphylococcus aureus infection.cfa05152=Tuberculosis.cfa05164=Influenza A.cfa05166=HTLV-I infection.cfa05168=Herpes simplex infection.cfa05310=Asthma.cfa05320=Autoimmune thyroid disease.cfa05322=Systemic lupus erythematosus.cfa05323=Rheumatoid arthritis.cfa05330=Allograft rejection.cfa05332=Graft-versus-host disease.cfa05416=Viral myocarditis +ENSCAFT00000001352 482026 cfa00565=Ether lipid metabolism +ENSCAFT00000001363 475084 cfa03022=Basal transcription factors +ENSCAFT00000001421 484096 N +ENSCAFT00000001523 475088 N +ENSCAFT00000001575 481744 cfa04141=Protein processing in endoplasmic reticulum +ENSCAFT00000001587 482035 N +ENSCAFT00000001597 609411 N +ENSCAFT00000002056 610014 N +ENSCAFT00000002100 U N +ENSCAFT00000002110 481249 N +ENSCAFT00000002175 476310 N +ENSCAFT00000002259 484151 N +ENSCAFT00000002460 481785 N +ENSCAFT00000002537 U N +ENSCAFT00000002577 484157 N +ENSCAFT00000002578 608906 N +ENSCAFT00000002660 U N +ENSCAFT00000002792 474523 N +ENSCAFT00000002849 475216 N +ENSCAFT00000002999 U N +ENSCAFT00000003163 474921 cfa03040=Spliceosome +ENSCAFT00000003223 474925 N +ENSCAFT00000003307 609995 N +ENSCAFT00000003515 482316 N +ENSCAFT00000003560 U N +ENSCAFT00000003644 484216 cfa00970=Aminoacyl-tRNA biosynthesis +ENSCAFT00000003824 475249 N +ENSCAFT00000003840 482333 N +ENSCAFT00000004092 474960 N +ENSCAFT00000004103 484298 N +ENSCAFT00000004208 481637 N +ENSCAFT00000004253 100534006.100534007.474588 N +ENSCAFT00000004311 482346 N +ENSCAFT00000004464 481892 N +ENSCAFT00000004511 481893 N +ENSCAFT00000004609 611755 N +ENSCAFT00000004673 611817 N +ENSCAFT00000004726 610047 cfa00230=Purine metabolism.cfa00240=Pyrimidine metabolism.cfa01100=Metabolic pathways.cfa03030=DNA replication.cfa03410=Base excision repair.cfa03420=Nucleotide excision repair.cfa03430=Mismatch repair.cfa03440=Homologous recombination.cfa05166=HTLV-I infection +ENSCAFT00000004799 U N +ENSCAFT00000004933 482382 cfa04621=NOD-like receptor signaling pathway.cfa05133=Pertussis +ENSCAFT00000004993 474995 cfa03008=Ribosome biogenesis in eukaryotes +ENSCAFT00000005126 U N +ENSCAFT00000005142 606804 N +ENSCAFT00000005225 475647 N +ENSCAFT00000005323 U N +ENSCAFT00000005467 U N +ENSCAFT00000005496 481925 N +ENSCAFT00000005518 492302 cfa02010=ABC transporters.cfa04971=Gastric acid secretion.cfa04972=Pancreatic secretion.cfa04976=Bile secretion +ENSCAFT00000005653 403417 cfa04145=Phagosome.cfa04620=Toll-like receptor signaling pathway.cfa05132=Salmonella infection.cfa05133=Pertussis.cfa05134=Legionellosis.cfa05140=Leishmaniasis.cfa05142=Chagas disease (American trypanosomiasis).cfa05144=Malaria.cfa05145=Toxoplasmosis.cfa05146=Amoebiasis.cfa05152=Tuberculosis.cfa05162=Measles.cfa05164=Influenza A.cfa05323=Rheumatoid arthritis +ENSCAFT00000005746 476410 cfa00071=Fatty acid metabolism.cfa03320=PPAR signaling pathway.cfa04920=Adipocytokine signaling pathway +ENSCAFT00000005749 610007 N +ENSCAFT00000005832 403584 cfa04060=Cytokine-cytokine receptor interaction.cfa04630=Jak-STAT signaling pathway.cfa04672=Intestinal immune network for IgA production.cfa05166=HTLV-I infection.cfa05168=Herpes simplex infection.cfa05323=Rheumatoid arthritis +ENSCAFT00000005972 475012 N +ENSCAFT00000006025 482980 N +ENSCAFT00000006114 483829 N +ENSCAFT00000006157 475021 N +ENSCAFT00000006219 483261 cfa04972=Pancreatic secretion.cfa04978=Mineral absorption +ENSCAFT00000006272 484394 cfa00280=Valine, leucine and isoleucine degradation.cfa00290=Valine, leucine and isoleucine biosynthesis.cfa00770=Pantothenate and CoA biosynthesis.cfa01100=Metabolic pathways +ENSCAFT00000006453 475893 N +ENSCAFT00000006479 U N +ENSCAFT00000006507 484622 cfa03030=DNA replication.cfa04110=Cell cycle +ENSCAFT00000006669 476094 N +ENSCAFT00000006689 475897 N +ENSCAFT00000006827 U N +ENSCAFT00000006891 610021 N +ENSCAFT00000007130 485445 cfa04020=Calcium signaling pathway.cfa04080=Neuroactive ligand-receptor interaction +ENSCAFT00000007145 607961 N +ENSCAFT00000007244 476781 N +ENSCAFT00000007375 403767 cfa04977=Vitamin digestion and absorption +ENSCAFT00000007440 482516 N +ENSCAFT00000007467 485576 N +ENSCAFT00000007484 609336 N +ENSCAFT00000007527 607108 N +ENSCAFT00000007553 487123 cfa03450=Non-homologous end-joining.cfa05340=Primary immunodeficiency +ENSCAFT00000007697 475382 N +ENSCAFT00000007703 477019 cfa03430=Mismatch repair.cfa03460=Fanconi anemia pathway.cfa05200=Pathways in cancer.cfa05210=Colorectal cancer.cfa05213=Endometrial cancer +ENSCAFT00000007747 U N +ENSCAFT00000007774 477021 cfa04510=Focal adhesion.cfa04512=ECM-receptor interaction.cfa04514=Cell adhesion molecules (CAMs).cfa04810=Regulation of actin cytoskeleton.cfa05410=Hypertrophic cardiomyopathy (HCM).cfa05412=Arrhythmogenic right ventricular cardiomyopathy (ARVC).cfa05414=Dilated cardiomyopathy +ENSCAFT00000007776 U N +ENSCAFT00000007779 478007.478008 cfa03060=Protein export.cfa04141=Protein processing in endoplasmic reticulum.cfa04145=Phagosome +ENSCAFT00000007859 483010 N +ENSCAFT00000007951 U N +ENSCAFT00000007959 482810.611087 N +ENSCAFT00000008012 485173 N +ENSCAFT00000008063 484489 N +ENSCAFT00000008142 476128 N +ENSCAFT00000008198 612489 N +ENSCAFT00000008413 U N +ENSCAFT00000008540 483021 N +ENSCAFT00000008586 484499 N +ENSCAFT00000008588 U N +ENSCAFT00000008673 478018 N +ENSCAFT00000008678 485188 N +ENSCAFT00000008728 U N +ENSCAFT00000008769 485523 cfa02010=ABC transporters.cfa04976=Bile secretion +ENSCAFT00000008831 475398 N +ENSCAFT00000009074 485769 cfa04330=Notch signaling pathway +ENSCAFT00000009114 483354 N +ENSCAFT00000009614 475416 N +ENSCAFT00000009698 486001 N +ENSCAFT00000009710 486002 N +ENSCAFT00000010094 486223 cfa00230=Purine metabolism.cfa00240=Pyrimidine metabolism.cfa01100=Metabolic pathways.cfa03030=DNA replication.cfa03410=Base excision repair.cfa03420=Nucleotide excision repair.cfa05166=HTLV-I infection +ENSCAFT00000010141 482857 cfa04360=Axon guidance +ENSCAFT00000010439 610992 N +ENSCAFT00000010496 415126 cfa04380=Osteoclast differentiation.cfa04916=Melanogenesis.cfa05200=Pathways in cancer.cfa05218=Melanoma +ENSCAFT00000010516 U N +ENSCAFT00000010531 484693 N +ENSCAFT00000010559 483405 N +ENSCAFT00000010593 U N +ENSCAFT00000010616 474176 cfa03450=Non-homologous end-joining.cfa04110=Cell cycle +ENSCAFT00000010630 486770 N +ENSCAFT00000010829 486944 N +ENSCAFT00000010865 U N +ENSCAFT00000010931 485368 N +ENSCAFT00000010977 U N +ENSCAFT00000010988 482891 cfa04145=Phagosome +ENSCAFT00000011187 475441 N +ENSCAFT00000011380 U N +ENSCAFT00000011397 475750 cfa04110=Cell cycle.cfa04114=Oocyte meiosis.cfa04120=Ubiquitin mediated proteolysis.cfa04914=Progesterone-mediated oocyte maturation.cfa05166=HTLV-I infection +ENSCAFT00000011721 475621 N +ENSCAFT00000011730 486534 N +ENSCAFT00000011771 477193 N +ENSCAFT00000011789 609978 N +ENSCAFT00000011968 488881 cfa00760=Nicotinate and nicotinamide metabolism.cfa04146=Peroxisome +ENSCAFT00000012081 478082 cfa04621=NOD-like receptor signaling pathway +ENSCAFT00000012133 611998 N +ENSCAFT00000012159 484609 N +ENSCAFT00000012254 U N
--- a/test-data/test_out/modify_snp_table/modify.gd_snp Fri Sep 28 11:34:31 2012 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,338 +0,0 @@ -Contig161_chr1_4641264_4641879 115 C T 73.5 chr1 4641382 C 6 0 2 45 8 0 2 51 15 0 2 72 5 0 2 42 6 0 2 45 10 0 2 57 Y 54 0.323 0 -Contig20_chr1_21313469_21313570 66 C T 54.0 chr1 21313534 C 4 0 2 39 4 0 2 39 5 0 2 42 4 0 2 39 4 0 2 39 5 0 2 42 N 1 +99. 0 -Contig86_chr1_30984450_30985684 670 C T 365.0 chr1 30985133 C 9 0 2 54 10 0 2 57 13 0 2 66 3 0 2 36 9 0 2 54 7 0 2 48 Y 145 0.031 0 -Contig5_chr1_32562160_32563940 1215 G T 163.0 chr1 32563356 G 17 0 2 78 19 0 2 84 20 0 2 87 14 0 2 69 12 0 2 63 10 0 2 57 Y 17 0.251 0 -Contig110_chr1_33385093_33386888 510 C T 270.0 chr1 33385587 A 14 0 2 69 11 0 2 60 19 0 2 84 11 0 2 60 10 0 2 57 13 0 2 66 Y 13 0.126 0 -Contig100_chr1_33562920_33564288 743 C T 178.0 chr1 33563655 C 6 0 2 45 10 0 2 57 8 0 2 51 5 0 2 42 13 0 2 66 7 0 2 48 Y 13 0.090 3 -Contig7_chr1_37302355_37302489 97 A G 59.2 chr1 37302452 G 3 0 2 36 8 0 2 51 5 0 2 42 8 0 2 51 7 0 2 48 6 0 2 45 N 56 2.812 0 -Contig62_chr1_41880715_41882180 1078 T G 57.6 chr1 41881785 T 14 0 2 69 15 0 2 72 16 0 2 75 13 0 2 66 8 0 2 51 10 0 2 57 Y 21 0.477 0 -Contig47_chr1_48409178_48409384 37 C T 134.0 chr1 48409215 T 5 0 2 42 6 0 2 45 8 0 2 51 9 0 2 54 4 0 2 39 6 0 2 45 N 66 +99. 0 -Contig119_chr1_49647683_49650077 1618 C A 99.7 chr1 49649276 A 8 0 2 51 11 0 2 60 10 0 2 57 9 0 2 54 10 0 2 57 14 0 2 69 Y 16 0.166 0 -Contig21_chr1_60697952_60699446 307 G A 51.9 chr1 60698265 G 12 0 2 63 9 0 2 54 4 0 2 39 6 0 2 45 9 0 2 54 4 0 2 39 Y 98 0.507 0 -Contig131_chr1_62319542_62320564 169 C G 103.0 chr1 62319709 C 12 0 2 63 12 0 2 66 14 0 2 69 12 0 2 63 9 0 2 54 9 0 2 54 Y 73 0.307 1 -Contig14_chr1_63450425_63450680 101 T A 102.0 chr1 63450530 T 8 0 2 51 10 0 2 57 18 0 2 81 8 0 2 51 8 0 2 34 8 0 2 51 N 99 1.085 0 -Contig83_chr1_63869778_63869942 40 T C 23.7 chr1 63869819 C 5 0 2 42 7 0 2 48 2 0 2 33 4 0 2 39 6 0 2 48 4 0 2 39 N 654 1.364 0 -Contig30_chr1_64702572_64703138 178 A T 117.0 chr1 64702750 T 10 0 2 57 10 0 2 57 20 0 2 87 21 0 2 90 6 0 2 45 12 0 2 63 Y 50 3.872 0 -Contig101_chr1_69868406_69868872 287 G A 14.6 chr1 69868689 G 13 0 2 66 17 0 2 78 10 0 2 57 8 0 2 51 7 0 2 48 8 0 2 51 N 137 0.305 0 -Contig35_chr1_74482577_74482791 170 G A 45.4 chr1 74482751 A 3 0 2 36 4 0 2 39 13 0 2 66 2 0 2 33 5 0 2 42 2 0 2 33 N 20 +99. 3 -Contig49_chr1_83865731_83865944 85 G A 34.1 chr1 -1 N 4 0 2 39 4 0 2 39 8 0 2 51 2 0 2 33 5 0 2 42 4 0 2 39 N -1 1.485 0 -Contig129_chr1_117547123_117548666 926 G A 126.0 chr1 117548059 G 19 0 2 84 9 0 2 54 11 0 2 60 10 0 2 57 12 0 2 63 11 0 2 60 Y 64 0.049 0 -Contig7_chr1_125154638_125154844 190 G T 130.0 chr1 125154818 A 5 0 2 42 4 0 2 39 7 0 2 48 2 0 2 33 7 0 2 48 4 0 2 39 N 33 +99. 0 -Contig222_chr2_9817738_9818143 220 C T 888.0 chr2 9817960 C 17 0 2 78 12 0 2 63 20 0 2 87 8 0 2 51 11 0 2 60 12 0 2 63 Y 76 0.093 1 -Contig47_chr2_25470778_25471576 126 G A 888.0 chr2 25470896 G 12 0 2 63 14 0 2 69 14 0 2 69 10 0 2 57 18 0 2 81 13 0 2 66 N 11 0.289 1 -Contig10_chr2_40859744_40860534 637 G A 888.0 chr2 40860397 A 3 0 2 36 3 0 2 36 2 0 2 33 7 0 2 48 6 0 2 45 8 0 2 51 Y 42 1.435 0 -Contig52_chr2_41421981_41422725 604 C A 888.0 chr2 41422583 A 17 0 2 78 18 0 2 81 14 0 2 69 17 0 2 78 12 0 2 63 14 0 2 69 Y 44 0.882 0 -Contig94_chr2_43869105_43870358 220 G A 888.0 chr2 43869333 G 12 0 2 63 18 0 2 81 11 0 2 60 15 0 2 72 12 0 2 63 13 0 2 66 Y 1 0.156 0 -Contig34_chr2_48444129_48444939 695 C T 134.0 chr2 48444828 C 14 0 2 69 8 0 2 51 16 0 2 75 17 0 2 78 9 0 2 54 15 0 2 72 Y 161 0.375 0 -Contig6_chr2_56859179_56859956 671 T C 999.9 chr2 56859851 T 15 0 2 72 18 0 2 81 20 0 2 90 19 0 2 84 19 0 2 84 24 0 2 99 N 28 5.308 1 -Contig115_chr2_61631913_61632510 310 G T 999.3 chr2 61632216 G 7 0 2 48 9 0 2 54 7 0 2 48 11 0 2 60 10 0 2 57 10 0 2 57 N 13 0.184 0 -Contig31_chr2_67331584_67331785 39 C T 999.0 chr2 67331623 C 11 0 2 60 10 0 2 57 7 0 2 48 9 0 2 54 2 0 2 33 4 0 2 39 N 110 0.647 1 -Contig92_chr2_75906683_75907774 773 T C 85.4 chr2 75907438 C 12 0 2 63 12 0 2 63 17 0 2 78 8 0 2 51 8 0 2 51 13 0 2 66 Y 93 0.166 0 -Contig163_chr2_76402959_76404830 221 C T 127.0 chr2 76403181 C 4 0 2 42 10 0 2 57 9 0 2 54 11 0 2 60 7 0 2 48 9 0 2 54 Y 54 0.178 1 -Contig59_chr2_85243022_85243758 506 G A 96.3 chr2 85243509 T 9 0 2 54 11 0 2 60 12 0 2 63 14 0 2 69 10 0 2 57 7 0 2 48 Y 6 0.459 0 -Contig56_chr3_17326225_17327548 387 G C 91.2 chr3 17326591 G 14 0 2 69 13 0 2 66 15 0 2 72 15 0 2 72 13 0 2 66 12 0 2 63 Y 20 0.225 3 -Contig108_chr3_46210055_46210874 367 A G 21.0 chr3 46210423 A 19 0 2 84 10 0 2 57 16 0 2 75 14 0 2 69 20 0 2 87 11 0 2 60 N 236 0.028 1 -Contig16_chr3_47113407_47114449 322 G A 105.0 chr3 47113713 G 13 0 2 66 17 0 2 78 15 0 2 72 6 0 2 45 11 0 2 60 11 0 2 60 Y 114 0.132 5 -Contig3_chr3_47564810_47565251 262 T G 112.0 chr3 47565104 T 14 0 2 69 16 0 2 75 20 0 2 87 10 0 2 57 9 0 2 54 8 0 2 51 Y 24 0.073 1 -Contig35_chr3_49662401_49662929 270 A T 96.1 chr3 49662652 A 14 0 2 69 11 0 2 60 23 0 2 96 13 0 2 66 12 0 2 63 11 0 2 60 Y 36 3.583 2 -Contig97_chr3_49820354_49821631 1069 G A 44.1 chr3 49821402 G 9 0 2 54 9 0 2 54 6 0 2 45 10 0 2 57 5 0 2 42 8 0 2 51 N 6 0.201 2 -Contig25_chr3_53260697_53262560 402 G A 211.0 chr3 53261095 G 17 0 2 78 14 0 2 69 15 0 2 75 12 0 2 63 14 0 2 69 12 0 2 63 Y 116 1.033 0 -Contig11_chr3_53992739_53995954 2392 G A 82.4 chr3 53995143 A 12 0 2 66 11 0 2 60 14 0 2 69 6 0 2 45 11 0 2 60 17 0 2 78 Y 358 0.321 1 -Contig236_chr3_72676275_72676473 128 G A 278.0 chr3 72676410 G 12 0 2 63 11 0 2 60 13 0 2 66 10 0 2 57 11 0 2 60 8 0 2 51 N 36 0.496 1 -Contig48_chr3_74792236_74792388 63 T C 111.0 chr3 74792289 - 17 0 2 78 9 0 2 54 9 0 2 54 5 0 2 42 11 0 2 60 9 0 2 54 N -1 3.528 0 -Contig65_chr3_80727952_80728283 39 T C 71.2 chr3 80727990 T 7 0 2 48 3 0 2 36 8 0 2 51 6 0 2 45 8 0 2 51 11 0 2 60 N 22 7.078 0 -Contig53_chr3_86407941_86409349 1406 G A 86.9 chr3 86409317 A 5 0 2 42 5 0 2 42 4 0 2 39 10 0 2 57 8 0 2 51 12 0 2 63 N 14 3.285 1 -Contig13_chr3_92409738_92412300 718 A G 23.3 chr3 92410450 A 12 0 2 63 16 0 2 75 18 0 2 81 13 0 2 66 22 0 2 93 7 0 2 48 Y 23 0.224 2 -Contig134_chr4_12145648_12148225 1326 C T 164.0 chr4 12146961 C 9 0 2 54 8 0 2 51 7 0 2 48 3 0 2 36 5 0 2 42 5 0 2 42 Y 4 0.080 1 -Contig88_chr4_15557471_15557833 268 A G 145.0 chr4 15557737 A 6 0 2 45 6 0 2 45 11 0 2 60 9 0 2 54 5 0 2 42 6 0 2 45 Y 46 4.138 0 -Contig53_chr4_18823968_18824478 149 A G 91.3 chr4 18824115 A 18 0 2 81 15 0 2 72 21 0 2 90 13 0 2 66 9 0 2 54 12 0 2 63 N 51 0.251 0 -Contig19_chr4_26233601_26233991 146 G C 51.6 chr4 26233744 G 10 0 2 57 8 0 2 51 9 0 2 54 5 0 2 42 9 0 2 54 4 0 2 39 N 41 0.163 3 -Contig78_chr4_28579975_28580134 30 T G 19.6 chr4 28579994 - 4 0 2 39 3 0 2 36 5 0 2 42 4 0 2 39 2 0 2 33 2 0 2 33 N 33 0.499 0 -Contig16_chr4_30177226_30179725 621 C T 88.4 chr4 30177859 C 20 0 2 87 13 0 2 66 13 0 2 66 11 0 2 60 8 0 2 51 8 0 2 51 Y 45 0.797 1 -Contig30_chr4_46196500_46197672 1045 A C 33.4 chr4 46197522 C 16 0 2 75 9 0 2 54 4 0 2 39 7 0 2 48 14 0 2 69 6 0 2 45 Y 43 0.306 0 -Contig2_chr4_47039007_47039323 158 G C 35.1 chr4 47039160 - 8 0 2 51 9 0 2 54 13 0 2 66 8 0 2 51 10 0 2 60 9 0 2 54 N 0 0.131 0 -Contig17_chr4_61310346_61311158 267 C T 49.9 chr4 61310604 T 10 0 2 57 7 0 2 48 9 0 2 54 10 0 2 57 14 0 2 69 7 0 2 48 Y 219 0.098 0 -Contig26_chr4_64190783_64191295 64 A G 162.0 chr4 64190843 A 10 0 2 57 6 0 2 45 20 0 2 87 12 0 2 63 17 0 2 78 7 0 2 48 Y 306 7.428 0 -Contig11_chr4_65500960_65501654 634 T C 107.0 chr4 65501585 T 13 0 2 66 14 0 2 69 13 0 2 66 13 0 2 66 6 0 2 45 18 0 2 81 Y 10 6.849 0 -Contig38_chr4_67768488_67768982 113 A G 102.0 chr4 67768598 A 9 0 2 54 8 0 2 51 9 0 2 54 11 0 2 60 10 0 2 57 7 0 2 48 Y 188 3.175 0 -Contig30_chr4_70978564_70979580 596 A G 164.0 chr4 70979151 A 15 0 2 72 12 0 2 63 20 0 2 87 14 0 2 69 15 0 2 72 15 0 2 72 Y 111 2.458 2 -Contig72_chr4_74225793_74226492 674 A G 110.0 chr4 74226472 A 5 0 2 42 3 0 2 36 2 0 2 33 3 0 2 36 7 0 2 48 4 0 2 39 Y 115 +99. 1 -Contig32_chr4_75618955_75620254 301 T C 333.0 chr4 75619257 C 10 0 2 57 8 0 2 51 12 0 2 63 20 0 2 87 12 0 2 63 14 0 2 69 Y 34 0.163 2 -Contig31_chr5_4734956_4736547 1166 C T 133.0 chr5 4736132 C 14 0 2 69 8 0 2 51 17 0 2 78 4 0 2 39 9 0 2 54 12 0 2 63 Y 1 0.021 0 -Contig30_chr5_15698241_15699076 396 G T 76.6 chr5 15698633 T 8 0 2 51 9 0 2 54 10 0 2 57 7 0 2 48 11 0 2 60 8 0 2 54 Y 65 0.009 0 -Contig36_chr5_17709244_17710004 373 T C 281.0 chr5 17709624 T 6 0 2 45 9 0 2 54 7 0 2 48 4 0 2 39 10 0 2 57 4 0 2 39 Y 16 0.131 0 -Contig13_chr5_21881138_21881562 227 A G 251.0 chr5 21881356 A 11 0 2 60 20 0 2 87 22 0 2 93 10 0 2 57 10 0 2 57 21 0 2 90 Y 182 2.013 0 -Contig5_chr5_23188121_23190168 1841 C T 141.0 chr5 23189975 C 20 0 2 87 19 0 2 84 22 0 2 93 16 0 2 75 18 0 2 81 14 0 2 69 N 45 0.355 0 -Contig6_chr5_26899813_26900498 97 A C 88.6 chr5 26899910 A 15 0 2 72 14 0 2 69 27 0 2 108 15 0 2 72 13 0 2 69 12 0 2 63 Y 92 7.370 3 -Contig314_chr5_34019166_34019319 72 C A 20.1 chr5 -1 N 6 0 2 45 9 0 2 54 4 0 2 39 4 0 2 39 9 0 2 54 5 0 2 42 N -1 +99. 4 -Contig147_chr5_38980258_38980559 221 C T 40.8 chr5 38980477 C 15 0 2 72 15 0 2 72 19 0 2 84 10 0 2 57 12 0 2 63 20 0 2 87 Y 11 4.576 0 -Contig115_chr5_48119079_48120169 151 C T 78.3 chr5 48119234 C 17 0 2 78 10 0 2 57 14 0 2 69 16 0 2 75 8 0 2 51 12 0 2 63 Y 205 0.320 0 -Contig45_chr5_50892738_50892968 169 C A 25.8 chr5 50892911 C 10 0 2 57 7 0 2 48 10 0 2 60 6 0 2 45 6 0 2 45 13 0 2 66 N 244 0.497 1 -Contig40_chr5_51484164_51484696 14 A G 53.3 chr5 51484180 A 6 0 2 45 4 0 2 39 4 0 2 39 3 0 2 36 0 0 2 13 3 0 2 36 N 63 +99. 1 -Contig40_chr5_51664286_51667573 861 C T 148.0 chr5 51665149 C 20 0 2 87 21 0 2 90 20 0 2 87 11 0 2 60 16 0 2 75 15 0 2 72 Y 207 0.080 1 -Contig15_chr5_51889708_51891244 882 A G 149.0 chr5 51890581 G 13 0 2 66 18 0 2 81 17 0 2 78 22 0 2 93 15 0 2 72 22 0 2 93 Y 7 0.025 1 -Contig143_chr5_57231364_57232010 294 T C 78.5 chr5 57231644 T 3 0 2 36 5 0 2 42 4 0 2 39 2 0 2 33 10 0 2 57 6 0 2 45 Y 73 0.337 2 -Contig13_chr5_57609985_57610584 496 C T 50.5 chr5 57610476 C 17 0 2 78 9 0 2 54 6 0 2 45 8 0 2 51 10 0 2 57 12 0 2 63 N 77 2.022 1 -Contig230_chr5_58486998_58487280 227 T C 192.0 chr5 58487232 T 3 0 2 36 4 0 2 39 9 0 2 54 6 0 2 45 4 0 2 39 7 0 2 48 N 24 0.100 2 -Contig32_chr5_70852360_70853289 282 G A 114.0 chr5 70852623 G 16 0 2 75 11 0 2 60 13 0 2 66 12 0 2 63 13 0 2 66 7 0 2 48 Y 33 0.276 0 -Contig100_chr5_71189678_71190590 813 C T 30.8 chr5 71190523 C 11 0 2 60 11 0 2 60 9 0 2 54 10 0 2 57 6 0 2 45 13 0 2 66 Y 8 0.362 1 -Contig45_chr5_76133561_76134403 388 A G 103.0 chr5 76133941 G 3 0 2 36 8 0 2 51 8 0 2 51 5 0 2 42 6 0 2 45 7 0 2 48 Y 57 0.038 0 -Contig61_chr5_90202541_90204393 909 C T 101.0 chr5 90203461 T 7 0 2 48 5 0 2 42 14 0 2 69 3 0 2 36 5 0 2 42 8 0 2 51 Y 64 1.448 0 -Contig111_chr6_5821219_5822519 1060 A G 68.1 chr6 5822321 T 7 0 2 48 6 0 2 45 11 0 2 60 9 0 2 54 3 0 2 36 12 0 2 63 Y 7 0.231 1 -Contig220_chr6_10671338_10672441 999 T C 36.3 chr6 10672322 T 5 0 2 42 11 0 2 60 11 0 2 60 8 0 2 51 5 0 2 42 9 0 2 54 Y 1 1.667 0 -Contig226_chr6_17361986_17362884 418 G C 251.0 chr6 17362406 G 6 0 2 45 8 0 2 51 7 0 2 48 9 0 2 54 7 0 2 48 7 0 2 48 Y 7 0.147 0 -Contig51_chr6_20231207_20231785 161 A G 70.5 chr6 20231375 G 13 0 2 66 5 0 2 42 8 0 2 51 2 0 2 36 5 0 2 42 5 0 2 42 Y 153 1.754 0 -Contig102_chr6_30271329_30271577 39 T G 139.0 chr6 30271371 G 3 0 2 36 4 0 2 39 6 0 2 45 1 0 2 30 4 0 2 39 4 0 2 39 N 15 1.159 0 -Contig217_chr6_31393824_31394218 97 G A 115.0 chr6 31393921 G 9 0 2 54 19 0 2 84 15 0 2 72 12 0 2 63 7 0 2 48 10 0 2 57 N 45 0.477 0 -Contig186_chr6_31928098_31928245 73 G A 117.0 chr6 -1 N 5 0 2 42 8 0 2 51 2 0 2 33 4 0 2 39 1 0 2 30 5 0 2 42 N -1 0.276 1 -Contig52_chr6_33188498_33188724 123 G A 59.0 chr6 -1 N 5 0 2 42 13 0 2 66 8 0 2 51 4 0 2 39 9 0 2 54 9 0 2 54 N -1 0.880 1 -Contig102_chr6_38743009_38743435 290 A G 178.0 chr6 38743311 A 11 0 2 60 13 0 2 66 9 0 2 54 11 0 2 60 12 0 2 63 13 0 2 66 Y 34 0.148 4 -Contig81_chr6_49018353_49019532 179 C A 72.5 chr6 49018530 A 15 0 2 72 13 0 2 66 19 0 2 72 8 0 2 51 12 0 2 63 16 0 2 75 Y 15 0.145 1 -Contig112_chr6_51024554_51024851 100 A G 121.0 chr6 51024654 A 10 0 2 57 12 0 2 63 9 0 2 54 13 0 2 66 14 0 2 69 17 0 2 78 N 75 4.287 0 -Contig40_chr6_51412751_51413807 227 T C 94.5 chr6 51412975 C 5 0 2 42 8 0 2 51 7 0 2 48 9 0 2 54 11 0 2 60 10 0 2 57 Y 4 5.661 0 -Contig47_chr6_69073222_69074767 1315 T C 212.0 chr6 69074558 T 20 0 2 87 17 0 2 78 18 0 2 81 12 0 2 63 17 0 2 78 7 0 2 48 Y 9 0.652 0 -Contig30_chr6_74848932_74849059 57 C G 46.3 chr6 74848993 C 7 0 2 48 7 0 2 33 6 0 2 45 7 0 2 48 5 0 2 42 6 0 2 45 N -1 +99. 1 -Contig84_chr7_6648683_6650255 1297 G A 110.0 chr7 6649988 G 18 0 2 81 9 0 2 54 22 0 2 77 16 0 2 75 20 0 2 87 6 0 2 45 Y 83 0.166 0 -Contig239_chr7_13007379_13007700 275 A G 39.8 chr7 13007642 A 8 0 2 51 5 0 2 42 8 0 2 51 3 0 2 36 3 0 2 36 5 0 2 42 N 46 1.511 3 -Contig119_chr7_18310707_18310948 23 A T 133.0 chr7 18310729 A 6 0 2 45 5 0 2 42 10 0 2 57 5 0 2 42 2 0 2 33 2 0 2 33 N 4553 +99. 0 -Contig93_chr7_18513377_18513741 173 T C 130.0 chr7 18513533 C 15 0 2 72 11 0 2 60 18 0 2 81 6 0 2 45 10 0 2 57 14 0 2 69 Y 115 0.174 0 -Contig133_chr7_19603333_19603776 414 C G 31.9 chr7 19603734 G 10 0 2 57 4 0 2 39 4 0 2 39 5 0 2 42 9 0 2 54 9 0 2 54 N 78 +99. 5 -Contig132_chr7_20426224_20428145 1815 A G 28.3 chr7 20428041 A 11 1 2 43 12 0 2 63 19 0 2 84 23 0 2 96 14 0 2 69 10 0 2 57 N 11 0.264 0 -Contig206_chr7_26281823_26282074 103 C A 101.0 chr7 26281925 T 11 0 2 60 16 0 2 61 19 0 2 84 6 0 2 45 19 0 2 84 16 0 2 75 N -1 0.947 1 -Contig55_chr7_53147505_53148974 894 A G 68.4 chr7 53148397 G 22 0 2 93 13 0 2 66 16 0 2 75 8 0 2 51 16 0 2 75 11 0 2 60 Y 19 0.060 0 -Contig4_chr7_53685534_53688206 1709 C G 76.2 chr7 53687225 C 18 0 2 81 17 0 2 78 18 0 2 81 15 0 2 72 14 0 2 69 14 0 2 69 Y 32 0.659 1 -Contig61_chr7_55832923_55834065 506 T C 185.0 chr7 55833450 C 9 0 2 54 10 0 2 57 22 0 2 93 12 0 2 63 12 0 2 63 7 0 2 48 Y 1 0.019 0 -Contig91_chr8_12804505_12805470 409 C A 111.0 chr8 12804906 C 8 0 2 51 10 0 2 57 15 0 2 72 12 0 2 63 14 0 2 69 15 0 2 72 N 145 0.175 0 -Contig8_chr8_27811135_27812620 333 C T 37.9 chr8 27811458 C 4 0 2 39 11 0 2 60 18 0 2 81 5 0 2 42 6 0 2 45 5 0 2 42 Y 1 0.272 0 -Contig66_chr8_28273102_28273660 175 G C 81.6 chr8 28273263 T 9 0 2 54 17 0 2 78 19 0 2 84 8 0 2 51 16 0 2 75 19 0 2 84 Y 3 2.735 0 -Contig84_chr8_31375511_31376456 443 T C 125.0 chr8 31375954 T 10 0 2 57 15 0 2 72 27 0 2 108 18 0 2 81 16 0 2 75 9 0 2 54 Y 2 0.650 0 -Contig18_chr8_32575859_32577431 264 T C 151.0 chr8 32576124 T 20 0 2 87 14 0 2 69 17 0 2 78 14 0 2 69 13 0 2 66 14 0 2 69 Y 17 0.915 1 -Contig54_chr8_40913908_40916451 1275 G A 175.0 chr8 40915190 G 10 0 2 57 8 0 2 51 11 0 2 60 7 0 2 48 8 0 2 51 9 0 2 54 Y 21 0.056 3 -Contig93_chr8_44658786_44659075 180 T G 55.3 chr8 44658964 T 4 0 2 39 3 0 2 36 6 0 2 45 5 0 2 45 5 0 2 42 4 0 2 39 N 14 0.188 0 -Contig66_chr8_58562376_58563446 345 C G 5.74 chr8 58562721 C 14 0 2 69 12 0 2 63 9 0 2 57 10 0 2 57 9 0 2 54 10 0 2 57 Y 6 0.685 0 -Contig44_chr8_71186368_71188207 1455 G T 147.0 chr8 71187818 G 4 10 1 74 3 0 2 36 20 0 2 87 12 0 2 63 8 0 2 51 10 0 2 57 Y 88 0.036 0 -Contig73_chr9_29451535_29452248 616 A G 24.7 chr9 29452127 G 4 0 2 39 7 0 2 48 1 0 2 30 4 0 2 39 7 0 2 48 6 0 2 45 N 49 0.448 4 -Contig96_chr9_39008495_39009278 215 A C 98.7 chr9 39008708 C 7 0 2 48 13 0 2 66 28 0 2 111 16 0 2 75 17 0 2 78 17 0 2 78 Y 8 0.427 1 -Contig69_chr10_40547265_40548153 371 G A 58.1 chr10 40547649 A 9 0 2 54 8 0 2 51 8 0 2 51 9 0 2 54 4 0 2 39 5 0 2 42 Y 20 0.138 4 -Contig63_chr10_42716594_42719945 1018 A G 88.7 chr10 42717616 G 13 0 2 66 14 0 2 69 13 0 2 66 12 0 2 63 18 0 2 81 5 0 2 42 Y 25 1.740 0 -Contig22_chr10_43255307_43255570 81 C A 37.2 chr10 43255383 C 15 0 2 72 18 0 2 81 22 0 2 93 16 0 2 75 11 0 2 60 12 0 2 63 N 62 0.450 0 -Contig9_chr10_51475063_51476054 770 C T 57.3 chr10 51475839 C 6 0 2 45 16 0 2 75 16 0 2 75 13 0 2 66 9 0 2 54 9 2 2 21 N 80 0.394 0 -Contig42_chr10_53816543_53818392 1642 G A 27.5 chr10 53818172 A 7 0 2 48 13 0 2 66 17 0 2 78 14 0 2 69 19 0 2 84 16 0 2 75 N 1 0.433 0 -Contig36_chr10_53992615_53993741 229 G C 86.2 chr10 53992846 G 17 0 2 78 14 0 2 69 13 0 2 66 15 0 2 72 12 0 2 63 15 0 2 72 N 23 1.912 0 -Contig20_chr10_58141129_58141750 575 C T 46.1 chr10 58141701 C 7 0 2 48 8 0 2 51 9 0 2 54 3 0 2 36 4 0 2 39 9 0 2 54 N 1 4.264 0 -Contig26_chr10_59510973_59511899 146 C A 29.0 chr10 59511126 C 8 0 2 51 13 0 2 66 18 0 2 81 13 0 2 66 10 0 2 57 7 0 2 48 Y 208 1.077 0 -Contig72_chr11_7142765_7143772 146 G A 152.0 chr11 7142911 A 8 0 2 51 8 0 2 51 24 0 2 99 10 0 2 57 17 0 2 78 11 0 2 60 Y 90 1.137 0 -Contig9_chr11_9904571_9905983 1284 C T 151.0 chr11 9905857 C 16 0 2 75 19 0 2 84 17 0 2 78 16 0 2 75 12 0 2 63 13 1 2 44 Y 11 0.422 1 -Contig7_chr11_40017076_40017630 352 C T 46.3 chr11 40017422 C 7 0 2 48 9 0 2 54 6 0 2 45 8 0 2 51 16 0 2 75 9 0 2 54 Y 44 0.336 0 -Contig108_chr11_42953408_42955156 367 A G 89.4 chr11 42953779 A 17 0 2 78 11 0 2 60 14 0 2 69 20 0 2 87 14 0 2 69 17 0 2 78 Y 118 0.784 1 -Contig16_chr11_53408448_53408790 187 A G 153.0 chr11 53408638 A 7 0 2 48 9 0 2 54 18 0 2 81 10 0 2 57 11 0 2 60 12 0 2 63 Y 116 1.367 0 -Contig21_chr12_18403415_18404381 586 G T 34.5 chr12 18403983 - 13 0 2 66 16 0 2 75 25 0 2 102 12 0 2 63 12 0 2 63 14 0 2 69 Y 12 0.068 0 -Contig33_chr12_19804073_19804529 178 T C 69.4 chr12 19804261 T 13 0 2 66 13 0 2 66 22 0 2 93 11 0 2 60 12 0 2 63 18 0 2 81 Y 11 1.571 0 -Contig41_chr12_25565452_25566993 475 G T 6.29 chr12 25565926 G 15 0 2 72 14 0 2 69 10 0 2 57 15 0 2 72 18 0 2 81 19 0 2 84 N 10 2.231 1 -Contig9_chr12_27204351_27204696 239 A G 145.0 chr12 27204587 A 7 0 2 48 8 0 2 51 12 0 2 63 8 0 2 51 11 0 2 60 11 0 2 60 Y 14 0.046 0 -Contig45_chr12_30548282_30550498 448 C T 124.0 chr12 30548703 - 9 0 2 54 11 0 2 60 22 0 2 93 19 0 2 84 12 0 2 63 12 0 2 63 Y 66 0.305 0 -Contig46_chr12_35571846_35572563 58 G C 83.2 chr12 35571906 G 4 0 2 39 10 0 2 57 11 0 2 60 6 0 2 45 10 0 2 57 6 0 2 45 Y 55 +99. 1 -Contig28_chr12_42075871_42076044 136 G A 134.0 chr12 42076006 A 6 0 2 45 5 0 2 42 7 0 2 48 7 0 2 48 2 0 2 33 4 0 2 39 N 3 9.479 0 -Contig16_chr12_42386141_42387454 194 A G 161.0 chr12 42386323 A 11 0 2 60 8 0 2 54 23 0 2 96 17 0 2 78 6 0 2 45 13 0 2 66 Y 7 0.927 1 -Contig42_chr12_44424628_44425829 255 A G 84.4 chr12 44424879 A 12 0 2 63 19 0 2 84 23 0 2 96 15 0 2 72 18 0 2 81 14 0 2 69 Y 18 1.190 2 -Contig10_chr12_44447953_44449698 63 C T 105.0 chr12 44448020 C 11 0 2 60 9 0 2 54 12 0 2 63 10 0 2 57 15 0 2 72 8 0 2 51 Y 31 11.791 0 -Contig5_chr12_53880670_53882675 1221 A C 99.4 chr12 53881888 A 16 0 2 75 18 0 2 81 23 0 2 96 10 0 2 57 15 0 2 72 17 0 2 78 Y 31 0.061 0 -Contig86_chr12_56715356_56716464 818 T C 166.0 chr12 56716164 T 20 0 2 87 16 0 2 75 16 0 2 75 14 0 2 69 13 0 2 66 7 0 2 48 Y 22 1.092 0 -Contig3_chr12_65021967_65024097 238 T G 92.6 chr12 65022205 T 17 0 2 78 14 0 2 69 16 0 2 75 9 0 2 54 13 0 2 66 15 0 2 72 Y 258 0.117 0 -Contig43_chr12_66499742_66500010 121 G T 41.5 chr12 66499866 G 12 0 2 63 4 0 2 39 8 0 2 51 6 0 2 45 10 0 2 57 6 0 2 45 N 42 0.421 0 -Contig14_chr12_71364692_71365311 20 A C 103.0 chr12 71364712 A 7 0 2 48 3 0 2 36 5 0 2 42 1 0 2 30 2 0 2 33 3 0 2 36 Y 35 +99. 0 -Contig37_chr13_15910164_15910426 245 G A 32.9 chr13 -1 N 3 4 1 41 4 0 2 39 3 0 2 36 4 0 2 39 3 0 2 36 10 0 2 57 N -1 2.159 1 -Contig107_chr13_26045881_26046290 341 C G 81.4 chr13 26046230 C 16 0 2 75 20 0 2 90 14 0 2 69 15 0 2 72 9 0 2 54 9 0 2 54 Y 51 4.510 0 -Contig251_chr13_28498333_28501066 864 T G 296.0 chr13 28499180 T 3 0 2 36 5 0 2 42 4 0 2 39 2 0 2 33 5 0 2 42 6 0 2 45 Y 9 0.068 0 -Contig154_chr13_36777857_36778736 356 G A 95.5 chr13 36778225 A 6 0 2 45 11 0 2 60 11 0 2 60 9 0 2 54 13 0 2 66 8 0 2 51 Y 59 0.192 0 -Contig37_chr13_42529793_42530857 150 G T 192.0 chr13 42529926 G 18 0 2 81 14 0 2 69 16 0 2 75 14 0 2 69 8 0 2 51 11 0 2 60 N 22 0.795 5 -Contig47_chr13_47045833_47046626 257 A C 28.5 chr13 47046097 A 13 0 2 66 10 0 2 57 17 0 2 78 20 0 2 87 15 0 2 72 9 0 2 57 N 129 0.468 0 -Contig42_chr13_47730018_47730856 254 A G 75.1 chr13 47730294 A 13 0 2 66 6 0 2 45 12 0 2 63 9 0 2 54 16 0 2 75 11 0 2 63 Y 630 0.049 1 -Contig55_chr13_53467708_53468101 221 T G 132.0 chr13 53467925 T 25 0 2 102 12 0 2 63 26 0 2 105 7 0 2 48 16 0 2 75 16 0 2 75 N 20 5.717 1 -Contig49_chr13_55103679_55105532 503 G A 76.0 chr13 55104178 G 21 0 2 90 19 0 2 84 18 0 2 81 20 0 2 87 8 9 1 89 17 0 2 78 Y 20 0.259 1 -Contig66_chr13_66021813_66022244 319 C T 125.0 chr13 66022136 C 11 0 2 60 16 0 2 75 15 0 2 75 12 0 2 63 17 0 2 78 8 0 2 51 N 14 0.055 3 -Contig48_chr14_11839435_11843272 3014 A G 163.0 chr14 11842446 A 10 0 2 57 8 0 2 51 13 0 2 66 10 0 2 57 5 0 2 42 10 0 2 57 Y 31 0.908 0 -Contig9_chr14_23353717_23354432 80 G A 61.3 chr14 23353797 G 3 0 2 36 6 0 2 45 11 0 2 60 8 0 2 51 4 0 2 39 2 4 1 35 Y 11 0.444 0 -Contig14_chr14_24131180_24133488 1633 G A 131.0 chr14 24132818 G 21 0 2 90 16 0 2 75 12 0 2 63 10 0 2 57 11 0 2 60 20 0 2 87 Y 36 0.347 0 -Contig28_chr14_26905747_26909514 975 G C 3.13 chr14 26906723 G 16 0 2 75 10 0 2 57 12 0 2 63 15 0 2 72 10 0 2 57 7 0 2 48 N 287 0.117 2 -Contig14_chr14_29616948_29618316 109 G A 80.3 chr14 29617053 - 17 0 2 78 16 0 2 75 16 0 2 75 10 0 2 57 17 0 2 78 19 0 2 84 Y 32 1.051 0 -Contig76_chr14_30028102_30029179 1046 C T 38.5 chr14 30029169 T 3 0 2 36 6 0 2 45 9 0 2 54 7 0 2 48 9 0 2 54 8 0 2 51 Y 96 +99. 0 -Contig115_chr14_31417207_31417574 259 A G 12.1 chr14 31417454 G 13 0 2 66 15 0 2 72 21 0 2 90 12 0 2 63 13 0 2 66 9 0 2 54 N 28 5.379 2 -Contig70_chr14_46653662_46653790 111 G A 46.7 chr14 46653768 G 7 0 2 48 5 0 2 42 11 0 2 60 11 0 2 60 8 0 2 51 10 0 2 57 N 21 +99. 2 -Contig43_chr14_49991855_49993511 918 A G 112.0 chr14 49992767 G 15 0 2 72 10 0 2 57 11 0 2 63 9 0 2 54 12 0 2 63 9 0 2 54 Y 6 0.314 1 -Contig64_chr14_56768376_56768902 473 C T 29.0 chr14 56768832 C 15 0 2 72 11 0 2 60 14 0 2 69 14 0 2 69 7 0 2 48 9 0 2 54 Y 91 8.281 0 -Contig60_chr15_18493036_18494316 150 G A 92.6 chr15 18493188 G 9 0 2 54 13 0 2 66 9 0 2 54 6 0 2 45 5 0 2 42 12 0 2 63 Y 45 0.125 0 -Contig59_chr15_22138344_22138535 120 G C 142.0 chr15 22138470 C 11 0 2 60 10 0 2 57 18 0 2 81 4 0 2 39 10 0 2 57 15 0 2 72 N 8 2.553 0 -Contig112_chr15_26772864_26773267 374 C T 21.6 chr15 26773244 C 4 0 2 39 4 0 2 39 5 0 2 42 2 0 2 33 4 0 2 39 3 0 2 36 N 18 +99. 0 -Contig24_chr15_26894765_26895003 155 G A 87.6 chr15 -1 N 6 0 2 45 5 0 2 42 7 0 2 48 4 0 2 39 4 0 2 39 2 0 2 33 N -1 0.178 0 -Contig2_chr15_33944796_33947182 1860 G A 99.5 chr15 33946654 G 10 0 2 57 11 0 2 60 16 0 2 75 14 0 2 69 14 0 2 69 16 0 2 75 Y 16 0.252 0 -Contig73_chr15_34690052_34691332 714 T C 130.0 chr15 34690769 T 7 0 2 48 7 0 2 48 17 0 2 78 9 0 2 54 9 0 2 54 4 0 2 39 Y 7 6.003 0 -Contig68_chr15_37747190_37747426 126 G A 130.0 chr15 37747331 G 14 0 2 69 14 0 2 69 11 0 2 63 19 0 2 84 13 0 2 66 21 0 2 90 N 229 0.255 0 -Contig104_chr15_45106954_45107158 70 A T 64.4 chr15 45107015 A 6 0 2 45 6 0 2 45 19 0 2 84 7 0 2 48 7 0 2 48 3 0 2 36 N 202 4.319 0 -Contig119_chr16_6160274_6160477 180 G A 54.8 chr16 6160457 G 7 0 2 48 6 0 2 45 12 0 2 63 3 0 2 36 11 0 2 60 10 0 2 57 N 42 +99. 0 -Contig126_chr16_10611887_10612152 150 G T 145.0 chr16 10612037 G 14 0 2 69 9 0 2 54 11 0 2 63 8 0 2 51 8 0 2 51 11 0 2 60 N 15 0.104 6 -Contig43_chr16_20200090_20200514 70 A G 58.6 chr16 20200154 A 11 0 2 60 15 0 2 72 15 0 2 72 6 0 2 45 9 0 2 54 12 0 2 63 Y 2 0.466 1 -Contig60_chr16_28079136_28080263 588 T G 157.0 chr16 28079739 T 22 0 2 93 20 0 2 87 22 0 2 93 17 0 2 78 12 0 2 63 10 0 2 57 Y 105 5.999 1 -Contig70_chr16_33758668_33759655 104 A T 58.1 chr16 33758772 A 6 0 2 45 7 0 2 48 17 0 2 78 14 0 2 69 8 0 2 51 10 0 2 57 N 54 0.162 0 -Contig66_chr16_37935682_37935831 116 T C 99.2 chr16 37935802 C 12 0 2 63 6 0 2 45 19 0 2 84 12 0 2 63 13 0 2 66 17 0 2 78 N 266 +99. 2 -Contig16_chr16_40451506_40451643 84 A G 59.8 chr16 40451592 A 7 0 2 48 5 0 2 42 7 0 2 48 13 0 2 66 14 0 2 69 19 0 2 84 N 45 5.061 0 -Contig31_chr17_12128267_12129637 205 G A 90.5 chr17 12128484 G 7 0 2 48 6 0 2 45 6 0 2 45 11 0 2 60 7 0 2 48 4 0 2 39 Y 10 0.246 0 -Contig1_chr17_12979232_12980380 808 G T 12.3 chr17 12980028 G 18 0 2 81 12 0 2 63 21 0 2 90 13 0 2 66 22 0 2 93 18 0 2 81 Y 9 0.336 1 -Contig42_chr17_23434859_23438330 2100 C T 39.5 chr17 23436985 T 4 0 2 39 7 0 2 48 7 0 2 48 3 0 2 36 6 0 2 45 2 0 2 33 Y 25 0.344 0 -Contig63_chr17_23796320_23796814 220 A G 54.0 chr17 23796536 G 6 0 2 45 4 0 2 39 5 0 2 42 6 0 2 45 4 0 2 39 6 0 2 45 Y 139 0.067 1 -Contig76_chr17_24107434_24107834 316 T C 141.0 chr17 24107726 T 19 0 2 84 15 0 2 72 20 0 2 87 16 0 2 75 11 0 2 60 18 0 2 81 Y 30 0.175 2 -Contig99_chr17_26021506_26022200 505 C T 88.8 chr17 26022017 T 15 0 2 72 13 0 2 66 19 0 2 84 9 0 2 54 10 0 2 57 11 0 2 60 Y 1 0.172 1 -Contig59_chr17_26790302_26795045 287 C T 45.1 chr17 26790582 C 8 0 2 51 6 0 2 45 13 0 2 66 6 0 2 45 15 0 2 72 12 0 2 63 Y 75 0.019 1 -Contig99_chr17_27018324_27019378 446 G A 31.1 chr17 27018776 G 14 0 2 69 12 0 2 63 14 0 2 69 10 0 2 57 9 0 2 54 11 0 2 60 Y 13 0.290 4 -Contig125_chr17_27739115_27739410 63 G A 107.0 chr17 27739177 G 8 0 2 51 11 0 2 60 16 0 2 75 8 0 2 51 4 0 2 39 15 0 2 72 N 100 0.819 0 -Contig115_chr17_37489899_37490101 159 G A 62.4 chr17 37490067 G 4 0 2 39 3 0 2 36 4 0 2 39 4 0 2 39 3 0 2 36 6 0 2 45 N 4 1.411 1 -Contig180_chr17_45154356_45154925 524 A G 146.0 chr17 45154886 G 7 0 2 48 9 0 2 54 7 0 2 48 9 0 2 54 4 0 2 39 8 0 2 51 Y 11 +99. 2 -Contig61_chr17_48221795_48223545 1404 T A 177.0 chr17 48223216 T 15 0 2 72 14 0 2 69 24 0 2 99 17 0 2 78 18 0 2 81 24 0 2 99 Y 161 0.633 2 -Contig27_chr17_61713766_61716585 1056 G C 40.0 chr17 61714821 G 4 0 2 39 8 0 2 51 10 0 2 57 6 0 2 45 6 0 2 45 3 0 2 36 N 6 2.200 4 -Contig229_chr18_3706523_3708577 1076 A G 83.9 chr18 3707630 A 11 0 2 60 13 0 2 66 26 0 2 105 11 0 2 60 15 0 2 72 17 0 2 78 Y 63 0.445 0 -Contig24_chr18_14049894_14050480 24 A G 123.0 chr18 14049918 A 5 0 2 42 5 0 2 42 4 0 2 39 6 0 2 45 7 0 2 48 5 0 2 42 Y 17 +99. 0 -Contig123_chr18_19916160_19916379 116 G A 79.2 chr18 19916272 A 14 0 2 69 12 0 2 63 14 0 2 69 6 0 2 45 11 0 2 60 10 0 2 57 N 26 0.172 0 -Contig82_chr18_27305489_27306229 566 C T 49.5 chr18 27306051 A 6 0 2 45 6 0 2 45 10 0 2 57 11 0 2 60 6 0 2 45 7 0 2 48 N 1 0.349 0 -Contig71_chr18_34324706_34326687 136 G A 151.0 chr18 34324841 G 9 0 2 54 9 0 2 54 17 0 2 78 8 0 2 51 11 0 2 60 10 0 2 57 Y 2 2.129 2 -Contig16_chr18_34672093_34673044 538 T C 58.2 chr18 34672635 T 8 0 2 51 15 0 2 72 16 0 2 75 15 0 2 72 9 0 2 57 18 0 2 81 Y 8 0.214 1 -Contig96_chr18_38492535_38493333 624 G A 119.0 chr18 38493162 T 17 0 2 78 12 0 2 63 13 0 2 66 16 0 2 75 8 0 2 51 15 0 2 72 Y 127 0.131 0 -Contig226_chr18_47753756_47754666 427 T C 21.1 chr18 47754215 T 10 0 2 57 4 0 2 39 8 0 2 51 5 0 2 42 6 0 2 45 7 0 2 48 Y 42 0.522 0 -Contig170_chr18_49411558_49412230 94 C A 74.3 chr18 49411655 C 14 0 2 69 10 0 2 57 9 0 2 54 10 0 2 57 3 0 2 36 3 0 2 36 N 9 1.457 0 -Contig192_chr18_49419342_49420737 1058 C T 42.8 chr18 49420381 A 3 0 2 36 4 0 2 39 5 0 2 42 8 0 2 51 3 0 2 36 3 0 2 36 Y 34 2.107 2 -Contig64_chr18_55979770_55980315 49 G A 89.1 chr18 55979824 G 3 0 2 36 9 0 2 54 7 0 2 51 4 0 2 39 3 0 2 36 3 0 2 36 Y -1 2.124 0 -Contig20_chr18_58130301_58130735 112 A G 74.4 chr18 58130413 A 12 0 2 66 11 0 2 60 11 0 2 60 12 0 2 63 6 0 2 45 6 0 2 45 Y 10 0.290 0 -Contig67_chr19_12398520_12399367 499 C T 161.0 chr19 12399017 C 10 0 2 57 11 0 2 60 20 0 2 87 14 0 2 69 24 0 2 99 8 0 2 51 Y 137 5.634 0 -Contig66_chr19_16285672_16287223 996 C T 190.0 chr19 16286674 C 9 0 2 57 14 0 2 69 16 0 2 78 17 0 2 78 8 0 2 51 22 0 2 93 Y 40 0.110 0 -Contig129_chr19_25541958_25542221 202 T C 68.1 chr19 25542154 C 11 0 2 60 19 0 2 84 10 0 2 60 17 0 2 78 9 0 2 54 12 0 2 63 N -1 2.551 1 -Contig29_chr19_37339947_37341911 1692 C T 211.0 chr19 37341631 C 15 0 2 72 20 0 2 87 11 0 2 60 15 0 2 72 3 0 2 36 12 0 2 63 Y 7 0.096 0 -Contig39_chr19_47709708_47711327 444 C T 36.8 chr19 47710148 T 10 0 2 57 4 0 2 39 8 0 2 51 9 0 2 54 6 0 2 45 6 0 2 45 Y 95 1.251 1 -Contig60_chr19_54013816_54014398 281 A G 138.0 chr19 54014103 C 6 0 2 45 15 0 2 72 7 0 2 48 10 0 2 57 15 0 2 72 10 0 2 57 Y 188 1.271 0 -Contig251_chr19_56559098_56559626 452 T C 3.36 chr19 56559549 T 12 0 2 63 13 0 2 66 21 0 2 90 15 0 2 72 14 0 2 69 11 0 2 60 N 1 0.117 0 -Contig50_chr20_12138509_12141975 3206 C A 248.0 chr20 12141763 C 8 0 2 51 15 0 2 72 14 0 2 69 6 0 2 45 10 0 2 57 7 0 2 48 Y 2 0.384 0 -Contig36_chr20_32631363_32632049 176 G A 24.1 chr20 32631526 G 7 0 2 48 14 0 2 69 19 0 2 84 14 0 2 69 15 0 2 72 16 0 2 75 N 50 1.150 0 -Contig32_chr20_36468058_36468869 66 C T 40.4 chr20 36468127 C 6 0 2 45 3 0 2 36 4 0 2 39 5 0 2 42 3 0 2 36 4 0 2 39 N 59 0.281 0 -Contig24_chr20_38203888_38204900 834 C T 132.0 chr20 38204731 C 9 0 2 54 17 0 2 78 20 0 2 87 8 0 2 51 11 0 2 60 17 0 2 78 Y 14 0.397 0 -Contig79_chr20_44263127_44264103 456 G T 31.5 chr20 44263573 G 22 0 2 93 16 0 2 75 15 0 2 72 19 0 2 84 13 0 2 66 26 0 2 105 Y 8 3.250 0 -Contig26_chr20_45878482_45878787 197 A G 160.0 chr20 45878672 A 17 0 2 78 15 0 2 72 11 0 2 63 17 0 2 78 12 0 2 63 10 0 2 57 N 14 0.535 0 -Contig119_chr20_46550670_46551383 609 G A 139.0 chr20 46551277 G 7 0 2 48 17 0 2 78 19 0 2 84 20 0 2 87 9 0 2 54 15 0 2 72 Y 7 0.488 1 -Contig50_chr21_4178523_4178687 121 G A 362.0 chr21 4178640 G 8 0 2 51 14 0 2 69 5 0 2 42 3 0 2 36 11 0 2 60 4 0 2 39 N 392 0.483 0 -Contig103_chr21_10177255_10177765 121 G A 125.0 chr21 10177367 G 12 0 2 63 10 0 2 57 10 0 2 57 17 0 2 78 14 0 2 69 7 0 2 51 Y 37 0.213 3 -Contig1_chr21_10805534_10806399 766 A G 146.0 chr21 10806301 G 10 0 2 57 6 0 2 45 9 0 2 54 6 0 2 45 7 0 2 48 5 0 2 42 Y 20 0.319 0 -Contig46_chr21_21029492_21030645 443 C T 5.37 chr21 21029910 C 15 0 2 72 11 0 2 60 16 0 2 75 15 0 2 72 13 0 2 66 6 0 2 45 Y 96 3.737 0 -Contig129_chr21_31045749_31046924 381 A G 129.0 chr21 31046141 A 19 0 2 84 8 0 2 51 23 0 2 96 12 0 2 63 15 0 2 72 18 0 2 81 Y 69 0.028 2 -Contig23_chr21_31651123_31651986 840 C T 71.3 chr21 31651957 T 6 0 2 45 9 0 2 54 8 0 2 51 10 0 2 57 4 0 2 39 7 0 2 48 Y 105 2.977 3 -Contig64_chr21_43341847_43342031 84 T C 114.0 chr21 43341926 T 11 0 2 60 9 0 2 54 10 0 2 57 6 0 2 45 6 0 2 45 7 0 2 48 N 10 3.954 2 -Contig60_chr21_43475347_43475824 175 C T 8.05 chr21 43475551 T 6 0 2 45 7 0 2 48 13 0 2 66 6 0 2 45 14 0 2 69 14 0 2 69 N 45 0.058 0 -Contig159_chr22_7896450_7896974 109 G C 151.0 chr22 7896570 G 16 0 2 75 5 7 1 62 14 0 2 69 16 0 2 75 13 0 2 66 13 0 2 66 Y 16 0.465 0 -Contig46_chr22_9416920_9417467 381 G A 145.0 chr22 9417259 G 10 0 2 57 9 0 2 54 10 0 2 57 6 0 2 45 13 0 2 66 7 0 2 48 Y 154 0.242 0 -Contig86_chr22_9440787_9441725 713 T G 119.0 chr22 9441488 G 6 0 2 45 12 0 2 63 10 0 2 57 11 0 2 60 13 0 2 66 16 0 2 75 Y 132 0.218 0 -Contig16_chr22_15636960_15637372 236 A C 9.79 chr22 15637192 T 4 0 2 39 5 0 2 42 12 0 2 63 7 0 2 48 6 0 2 45 11 0 2 60 Y 5 2.163 0 -Contig4_chr22_16114310_16114546 128 G C 101.0 chr22 16114432 G 10 0 2 57 13 0 2 66 20 0 2 87 20 0 2 87 16 0 2 75 9 0 2 54 N 19 0.526 0 -Contig23_chr22_34612023_34612568 167 C G 92.3 chr22 34612181 C 11 0 2 60 18 0 2 81 13 0 2 66 8 0 2 51 12 0 2 63 14 0 2 69 Y 7 0.409 0 -Contig4_chr22_38252245_38253712 799 A C 159.0 chr22 38253064 A 18 0 2 81 15 0 2 72 15 0 2 72 20 0 2 87 27 0 2 108 15 0 2 72 Y 90 4.330 0 -Contig122_chr22_48412466_48414788 1888 C T 125.0 chr22 48414355 T 16 0 2 75 15 0 2 72 16 0 2 75 14 0 2 72 12 0 2 63 7 0 2 48 N 42 0.122 0 -Contig77_chr22_49764414_49764875 353 C A 148.0 chr22 49764777 C 7 4 1 65 18 0 2 81 16 0 2 75 20 0 2 87 4 3 1 52 9 4 1 67 Y 12 0.941 0 -Contig26_chr22_57817664_57819633 1453 A G 150.0 chr22 57819121 G 9 0 2 54 9 0 2 54 13 0 2 66 15 0 2 72 11 0 2 60 14 0 2 69 N 15 0.471 1 -Contig348_chr22_62406104_62406495 189 C A 134.0 chr22 62406302 A 9 0 2 54 14 0 2 69 11 0 2 60 10 0 2 57 12 0 2 63 6 0 2 45 Y 5 0.912 0 -Contig133_chr23_3525134_3526502 1223 A G 201.0 chr23 3526387 A 11 0 2 60 13 0 2 66 23 0 2 96 21 0 2 90 13 0 2 66 10 0 2 57 Y 61 1.359 0 -Contig111_chr23_7058063_7058181 107 G A 108.0 chr23 7058162 A 8 0 2 51 8 0 2 51 7 0 2 48 2 0 2 33 5 0 2 42 6 0 2 45 N 3 +99. 0 -Contig79_chr23_7844129_7844837 110 C A 141.0 chr23 7844237 T 13 0 2 66 15 0 2 72 17 0 2 78 12 0 2 63 15 0 2 72 16 0 2 75 Y 40 0.339 0 -Contig38_chr23_9201002_9201725 597 C T 155.0 chr23 9201609 T 17 0 2 78 8 0 2 51 13 0 2 66 5 0 2 42 11 0 2 60 7 0 2 48 Y 167 0.633 1 -Contig33_chr23_20672540_20674320 347 T A 91.4 chr23 20672885 A 11 0 2 60 14 0 2 69 15 0 2 72 7 0 2 48 12 0 2 63 18 0 2 81 Y 31 0.452 1 -Contig35_chr23_28447813_28449115 70 T A 21.3 chr23 28447881 T 9 0 2 54 8 0 2 51 10 0 2 57 9 0 2 54 10 0 2 57 12 0 2 63 N 251 0.163 1 -Contig51_chr23_30590939_30591162 140 C T 142.0 chr23 30591080 C 14 0 2 69 4 0 2 39 10 0 2 57 12 0 2 63 14 0 2 69 4 0 2 39 N 13 1.658 0 -Contig57_chr23_32216351_32216721 179 T G 143.0 chr23 32216534 T 15 0 2 72 15 0 2 72 23 0 2 96 13 0 2 66 16 0 2 75 15 0 2 72 N 32 1.387 1 -Contig93_chr23_35744841_35745791 40 A T 30.4 chr23 35744880 T 6 0 2 45 7 0 2 48 7 0 2 48 2 0 2 33 5 0 2 42 5 0 2 42 Y 50 2.173 0 -Contig32_chr23_48285289_48286638 186 T C 176.0 chr23 48285470 T 18 0 2 81 12 0 2 63 16 0 2 75 13 0 2 66 9 0 2 54 9 0 2 54 Y 4 4.238 1 -Contig50_chr24_22515247_22516072 761 C T 243.0 chr24 22515981 T 11 0 2 60 10 0 2 57 8 0 2 51 9 0 2 54 18 0 2 81 8 0 2 51 Y 1 0.190 0 -Contig84_chr24_29196623_29199644 466 C T 126.0 chr24 29197091 T 7 0 2 48 11 0 2 60 8 0 2 51 7 0 2 48 11 0 2 60 15 0 2 72 Y 42 0.215 0 -Contig145_chr24_34778364_34778898 163 T C 372.0 chr24 34778541 C 10 0 2 57 8 0 2 51 12 0 2 63 12 0 2 63 6 1 2 31 7 0 2 48 Y 40 0.037 0 -Contig34_chr24_36147443_36150244 2679 C T 140.0 chr24 36150125 C 13 0 2 66 7 0 2 48 14 0 2 69 14 0 2 69 10 0 2 57 13 0 2 66 N 282 0.099 1 -Contig164_chr24_46598127_46599206 84 C T 105.0 chr24 46598214 C 13 0 2 66 12 0 2 63 15 0 2 72 15 0 2 72 11 0 2 60 8 0 2 51 Y 22 1.262 1 -Contig144_chr25_4011170_4013134 541 A G 160.0 chr25 4011690 A 12 0 2 63 17 0 2 78 13 0 2 66 13 0 2 66 13 0 2 66 13 0 2 66 Y 5 0.087 0 -Contig81_chr25_6103472_6104760 699 G A 378.0 chr25 6104190 A 14 0 2 69 16 0 2 75 13 0 2 66 11 0 2 60 11 0 2 60 12 0 2 63 Y 33 0.789 2 -Contig152_chr25_7486442_7487609 75 A G 11.6 chr25 7486515 A 17 0 2 78 13 0 2 66 8 0 2 51 16 0 2 75 8 0 2 51 6 0 2 45 N 2 0.158 0 -Contig24_chr25_7695778_7698612 2714 C T 130.0 chr25 7698446 C 16 0 2 75 13 0 2 66 22 0 2 93 17 0 2 78 10 0 2 57 17 0 2 78 Y 27 0.346 0 -Contig89_chr25_8635170_8636009 586 G C 209.0 chr25 8635744 G 13 0 2 66 13 0 2 66 21 0 2 93 14 0 2 69 15 0 2 72 15 0 2 72 Y 14 0.067 0 -Contig59_chr25_18196776_18197707 785 G A 112.0 chr25 18197551 G 8 10 1 42 27 0 2 108 21 0 2 90 18 0 2 81 10 0 2 57 14 0 2 69 N 36 3.625 0 -Contig103_chr25_38891221_38892140 407 G A 131.0 chr25 38891644 G 8 0 2 51 14 0 2 69 18 0 2 81 8 0 2 51 8 0 2 51 11 0 2 60 Y 149 0.167 4 -Contig84_chr25_42407960_42408708 55 C T 119.0 chr25 42408013 C 6 0 2 45 9 0 2 54 11 0 2 60 9 0 2 54 7 0 2 48 8 0 2 51 Y 11 0.121 0 -Contig73_chr25_43562500_43564110 955 T C 52.1 chr25 43563469 C 9 0 2 57 4 0 2 39 6 0 2 45 5 0 2 42 7 0 2 48 10 0 2 57 Y 4 1.406 0 -Contig37_chr25_51074433_51074885 170 A G 102.0 chr25 51074589 G 11 0 2 60 7 0 2 48 6 0 2 45 15 0 2 72 9 0 2 54 7 0 2 48 Y 68 0.207 1 -Contig204_chr26_4311195_4311778 170 C T 16.9 chr26 4311363 T 20 0 2 87 8 0 2 51 13 0 2 66 18 0 2 81 11 0 2 60 14 0 2 69 N 35 0.085 0 -Contig122_chr26_7622321_7623491 106 C G 139.0 chr26 7622423 C 3 0 2 36 9 0 2 54 10 0 2 57 12 0 2 63 9 0 2 54 5 0 2 42 N 19 0.458 0 -Contig11_chr26_11062142_11062902 707 C A 108.0 chr26 11062836 T 7 0 2 48 8 0 2 51 16 0 2 75 10 0 2 57 6 0 2 45 14 0 2 69 Y -1 4.709 0 -Contig133_chr26_17695661_17696368 39 T G 98.7 chr26 17695700 T 10 0 2 57 3 0 2 36 11 0 2 60 9 0 2 54 2 0 2 33 1 0 2 30 N 85 3.402 0 -Contig146_chr26_26622638_26623906 574 G A 186.0 chr26 26623219 A 11 0 2 60 12 0 2 63 9 0 2 54 11 0 2 60 9 0 2 54 12 0 2 63 Y 1 0.318 0 -Contig8_chr26_27834126_27834326 140 G A 41.7 chr26 27834268 G 13 0 2 66 7 0 2 48 13 0 2 66 11 0 2 60 12 0 2 63 6 0 2 45 N 29 0.142 1 -Contig78_chr26_31128839_31129005 123 T C 145.0 chr26 -1 N 11 0 2 60 3 0 2 36 7 0 2 48 8 0 2 51 10 0 2 46 7 0 2 48 N -1 1.230 1 -Contig28_chr26_32935355_32935833 289 T C 77.9 chr26 32935638 T 15 0 2 72 22 0 2 93 15 0 2 72 9 0 2 54 15 0 2 72 17 0 2 78 Y 10 2.258 1 -Contig135_chr27_6853874_6854079 158 C T 116.0 chr27 6854032 T 18 0 2 81 19 0 2 84 13 0 2 66 7 0 2 48 8 0 2 51 11 0 2 60 N 4 0.060 1 -Contig47_chr27_11777710_11777915 25 A G 67.3 chr27 11777731 A 3 0 2 36 5 0 2 42 6 0 2 45 10 0 2 57 9 0 2 54 6 0 2 45 N 97 +99. 0 -Contig23_chr27_14633002_14633153 23 G A 128.0 chr27 14633023 A 3 0 2 36 4 0 2 39 5 0 2 42 5 0 2 42 3 0 2 36 2 0 2 33 N 240 3.881 0 -Contig29_chr27_15428166_15429413 380 T C 140.0 chr27 15428539 T 15 0 2 72 15 0 2 72 17 0 2 78 15 0 2 72 15 0 2 72 15 0 2 72 Y 47 0.916 1 -Contig31_chr27_19519489_19520891 129 G T 14.9 chr27 19519624 T 12 0 2 63 19 0 2 84 20 0 2 87 16 0 2 75 10 0 2 57 11 0 2 60 Y 48 2.756 0 -Contig35_chr27_40596169_40596445 20 G C 133.0 chr27 40596189 G 8 0 2 51 3 0 2 36 4 0 2 39 2 0 2 33 4 0 2 39 4 0 2 39 Y 4 +99. 1 -Contig85_chr27_45471750_45472022 211 G A 53.1 chr27 45471964 G 18 0 2 81 10 0 2 57 15 0 2 72 0 13 0 36 16 0 2 75 14 0 2 69 N 75 2.502 1 -Contig131_chr28_6481806_6483783 138 C T 36.2 chr28 6481953 C 12 0 2 63 12 0 2 63 20 0 2 87 11 0 2 60 10 0 2 57 12 0 2 63 Y 10 0.387 0 -Contig141_chr28_10027332_10028242 780 T G 74.8 chr28 10028095 T 10 0 2 57 11 0 2 60 14 0 2 69 10 0 2 57 7 0 2 48 9 0 2 54 Y 19 3.348 0 -Contig144_chr28_15468203_15470548 743 G A 20.0 chr28 15468942 G 13 0 2 66 12 0 2 63 10 0 2 57 11 0 2 60 16 0 2 75 7 0 2 48 N 14 0.053 0 -Contig47_chr28_21311718_21312366 541 G A 116.0 chr28 21312258 G 9 0 2 54 6 0 2 45 12 0 2 63 6 0 2 45 5 0 2 45 12 0 2 63 N 9 0.240 0 -Contig60_chr28_30197166_30197364 92 T C 164.0 chr28 30197258 T 10 0 2 57 13 0 2 66 15 0 2 72 16 0 2 75 12 0 2 63 11 0 2 60 N 369 1.139 0 -Contig29_chr29_4726399_4727143 559 A T 163.0 chr29 4726955 A 15 0 2 72 18 0 2 81 18 0 2 81 16 0 2 75 11 0 2 60 14 0 2 72 Y 161 3.114 0 -Contig48_chr29_13129286_13130137 232 A G 92.2 chr29 13129514 G 13 0 2 66 11 0 2 60 19 0 2 84 16 0 2 75 11 0 2 60 17 0 2 78 Y 337 2.581 1 -Contig33_chr29_17000374_17000921 71 C T 48.6 chr29 17000441 - 4 0 2 39 9 0 2 54 12 0 2 66 10 0 2 57 7 0 2 48 4 0 2 39 N 26 5.491 0 -Contig34_chr29_17581796_17584016 2105 C T 126.0 chr29 17583890 T 14 0 2 69 11 0 2 60 18 0 2 81 12 0 2 63 10 0 2 57 10 0 2 57 Y 22 2.208 0 -Contig19_chr29_20976080_20977761 1007 G A 115.0 chr29 20977076 G 19 0 2 84 22 0 2 93 22 0 2 93 22 0 2 93 11 0 2 60 13 0 2 66 Y 4 1.915 0 -Contig51_chr29_21149853_21150467 266 C T 146.0 chr29 21150118 C 12 0 2 63 12 0 2 63 23 0 2 96 14 0 2 69 13 0 2 66 10 0 2 57 Y 4 0.051 0 -Contig1_chr30_5992217_5993068 106 C T 129.0 chr30 5992319 C 10 0 2 57 11 0 2 60 7 0 2 48 11 0 2 60 10 0 2 57 12 0 2 63 Y 76 1.079 0 -Contig1_chr30_8232878_8233406 402 C T 127.0 chr30 8233264 C 8 0 2 51 19 0 2 84 16 0 2 75 18 0 2 81 10 0 2 57 14 0 2 69 Y 358 5.283 0 -Contig108_chr30_9436961_9437520 546 C T 39.8 chr30 9437502 C 7 0 2 48 5 0 2 42 2 0 2 33 7 0 2 48 5 0 2 42 7 0 2 48 Y 64 +99. 0 -Contig165_chr30_25804389_25804926 190 T C 126.0 chr30 25804592 C 3 0 2 36 8 0 2 51 7 0 2 48 10 0 2 57 7 0 2 48 4 0 2 39 Y 113 0.329 0 -Contig193_chr30_27495616_27496125 434 C A 234.0 chr30 27496024 C 13 0 2 66 16 0 2 75 25 0 2 102 16 0 2 75 13 0 2 66 14 0 2 69 Y 76 2.621 0 -Contig38_chr31_5164423_5166573 2074 C T 134.0 chr31 5166501 T 13 0 2 66 10 0 2 57 17 0 2 78 11 0 2 60 17 0 2 78 10 0 2 57 Y 58 +99. 0 -Contig6_chr31_9649308_9650149 431 G T 162.0 chr31 9649742 G 31 0 2 120 23 0 2 96 17 0 2 78 17 0 2 78 10 0 2 57 16 0 2 75 Y 98 2.200 0 -Contig7_chr31_12384974_12386400 305 C T 69.6 chr31 12385267 C 6 0 2 45 10 0 2 57 11 0 2 60 11 0 2 60 9 0 2 54 12 0 2 63 Y 44 1.165 0 -Contig90_chr31_17267583_17267778 81 C A 143.0 chr31 17267665 C 20 0 2 87 6 0 2 45 14 0 2 72 22 0 2 93 17 0 2 78 15 0 2 72 N 7 0.565 0 -Contig137_chr31_23357653_23358568 885 G A 119.0 chr31 23358545 G 5 0 2 42 3 0 2 36 3 0 2 36 2 0 2 33 3 0 2 36 4 0 2 39 Y 11 +99. 0 -Contig17_chr31_26433828_26434459 498 T C 9.79 chr31 26434322 T 18 0 2 81 10 0 2 57 15 0 2 72 13 0 2 66 16 0 2 75 15 0 2 72 Y 137 4.814 0 -Contig30_chr32_25902721_25905783 208 C G 162.0 chr32 25902927 G 11 0 2 60 13 0 2 66 11 0 2 60 12 0 2 63 7 0 2 48 11 0 2 60 Y 145 0.322 2 -Contig42_chr32_38900713_38901320 320 A G 134.0 chr32 38901021 T 12 0 2 63 10 0 2 57 9 11 1 104 5 0 2 42 19 0 2 84 7 6 1 56 Y 71 0.165 0 -Contig18_chr33_22207246_22209159 1363 G T 51.5 chr33 22208619 - 16 0 2 75 8 0 2 51 11 0 2 60 10 0 2 57 15 0 2 72 12 0 2 63 Y 59 2.560 0 -Contig104_chr33_22483642_22484187 424 C T 140.0 chr33 22484054 T 13 0 2 66 16 0 2 75 9 0 2 54 15 0 2 72 13 0 2 66 10 0 2 57 Y 36 0.404 0 -Contig170_chr33_26189421_26189940 292 T C 98.4 chr33 26189703 T 21 0 2 90 13 0 2 66 15 0 2 72 13 0 2 66 19 0 2 84 13 0 2 66 Y 23 0.307 0 -Contig41_chr34_16544482_16545449 46 T C 102.0 chr34 16544523 T 5 0 2 42 11 0 2 60 6 0 2 45 0 2 0 3 7 0 2 48 8 0 2 51 Y 215 1.156 0 -Contig8_chr34_18474513_18475673 1122 C A 129.0 chr34 18475628 A 8 0 2 51 15 0 2 72 13 0 2 66 17 0 2 78 13 0 2 66 6 0 2 45 Y 61 0.123 2 -Contig152_chr34_31794848_31795540 242 G A 93.2 chr34 31795093 G 11 0 2 60 24 0 2 99 17 0 2 78 15 0 2 72 18 0 2 81 17 0 2 78 Y 123 2.780 0 -Contig28_chr34_41708848_41712034 1381 A G 78.2 chr34 41710232 A 11 0 2 60 17 0 2 78 15 0 2 72 16 0 2 75 15 0 2 72 14 0 2 69 Y 236 0.234 0 -Contig85_chr34_42798284_42800584 1845 C T 171.0 chr34 42800126 T 5 0 2 42 7 0 2 48 6 0 2 45 7 0 2 48 6 0 2 45 2 0 2 33 Y 5 2.787 0 -Contig47_chr35_3666773_3667898 348 G T 124.0 chr35 3667121 G 9 0 2 54 20 0 2 87 18 0 2 81 15 0 2 72 12 0 2 63 14 0 2 69 Y 285 0.235 0 -Contig195_chr35_15722500_15722741 205 G A 4.08 chr35 15722718 G 3 0 2 36 5 0 2 42 1 0 2 30 6 0 2 45 1 0 2 30 1 0 2 30 N 43 +99. 0 -Contig101_chr35_19513178_19513697 62 C T 112.0 chr35 19513238 C 12 0 2 63 7 0 2 48 13 0 2 66 7 0 2 48 5 0 2 42 8 0 2 51 N 115 3.135 0 -Contig47_chr35_24382042_24382526 33 G A 87.0 chr35 24382076 G 5 0 2 42 4 0 2 39 6 0 2 45 7 0 2 48 4 0 2 39 2 0 2 33 Y 71 +99. 0 -Contig77_chr35_24796947_24797172 65 A G 52.1 chr35 24797009 A 7 0 2 48 5 0 2 42 8 0 2 51 6 0 2 45 12 0 2 63 10 0 2 57 N 11 1.401 3 -Contig74_chr35_25394343_25394813 303 A T 221.0 chr35 25394646 G 23 0 2 96 15 0 2 72 25 0 2 105 7 7 1 49 18 0 2 81 16 0 2 75 Y 58 4.298 0 -Contig5_chr36_4562983_4563634 343 C T 151.0 chr36 4563324 T 20 0 2 87 20 0 2 87 23 0 2 96 24 0 2 99 9 0 2 54 8 0 2 51 Y 40 1.169 0 -Contig75_chr36_7885319_7885588 53 G A 25.7 chr36 7885372 G 10 0 2 57 8 0 2 51 13 0 2 66 7 0 2 48 4 0 2 39 7 0 2 48 N 7 2.653 0 -Contig184_chr36_18956191_18958552 187 A G 11.5 chr36 18956371 G 10 0 2 57 11 0 2 60 21 0 2 90 14 0 2 69 7 0 2 48 4 0 2 39 N 278 1.434 2 -Contig12_chr36_21557176_21557828 513 T A 159.0 chr36 21557695 A 11 0 2 60 14 0 2 69 21 0 2 90 12 0 2 63 15 0 2 72 11 0 2 60 Y 55 0.222 0 -Contig2_chr36_22436067_22436794 653 C T 73.0 chr36 22436730 C 11 0 2 60 16 0 2 75 13 0 2 66 11 0 2 60 21 0 2 90 21 0 2 90 Y 9 0.534 0 -Contig133_chr36_32954045_32955409 136 A G 116.0 chr36 32954182 A 16 0 2 75 15 0 2 72 20 0 2 87 11 0 2 60 18 0 2 81 13 0 2 66 Y 74 3.772 1 -Contig53_chr37_6665763_6665919 116 C T 111.0 chr37 6665875 C 9 0 2 54 9 0 2 54 5 0 2 42 9 0 2 54 8 0 2 51 10 0 2 57 N 15 10.875 1 -Contig42_chr37_9589176_9591269 252 G A 25.1 chr37 9589430 G 10 0 2 40 13 0 2 66 18 0 2 81 21 0 2 90 9 0 2 54 17 0 2 78 N 67 1.170 2 -Contig2_chr37_17134963_17136513 1140 A C 158.0 chr37 17136092 A 14 0 2 69 24 0 2 99 17 0 2 78 16 0 2 75 15 0 2 75 13 0 2 66 Y 12 0.053 1 -Contig18_chr37_17147806_17149851 291 T G 112.0 chr37 17148084 T 4 6 1 45 16 0 2 75 17 0 2 78 14 0 2 69 22 0 2 93 13 0 2 66 Y 41 4.442 0 -Contig64_chr37_17606895_17607534 565 C T 30.2 chr37 17607439 A 9 0 2 54 16 0 2 75 20 0 2 87 14 0 2 69 16 0 2 75 10 0 2 57 N 20 1.622 0 -Contig126_chr37_21587881_21590621 373 G T 132.0 chr37 21588256 G 11 0 2 60 11 0 2 60 23 0 2 96 12 0 2 63 8 0 2 51 18 0 2 81 Y 12 0.549 0 -Contig2_chr37_31197993_31198256 182 C T 39.6 chr37 31198171 T 6 0 2 45 10 0 2 57 7 0 2 48 9 0 2 54 10 0 2 57 12 0 2 63 N 2 0.595 0 -Contig46_chr37_31852376_31853555 825 A G 111.0 chr37 31853191 G 19 0 2 84 14 0 2 69 15 0 2 72 7 0 2 48 8 0 2 51 16 0 2 75 Y 17 0.128 1 -Contig7_chr38_12217200_12218387 1163 A T 44.4 chr38 12218353 A 11 0 2 60 13 0 2 66 17 0 2 78 10 0 2 57 11 0 2 60 11 0 2 60 Y 67 +99. 0 -Contig15_chr38_12282020_12282253 150 C T 156.0 chr38 12282164 A 17 0 2 78 11 0 2 60 19 0 2 84 14 0 2 69 5 0 2 42 14 0 2 69 Y 26 2.952 1 -Contig6_chr38_16185744_16186110 325 A G 74.9 chr38 16186061 A 5 0 2 42 3 0 2 36 9 0 2 54 7 0 2 48 1 0 2 30 12 0 2 63 Y 40 +99. 0 -Contig265_chrX_2689247_2689484 114 C G 103.0 chrX 2689356 C 11 0 2 60 9 0 2 54 13 0 2 66 16 0 2 75 14 0 2 69 10 0 2 57 N 2 9.232 1 -Contig122_chrX_6026976_6027327 330 C T 79.4 chrX 6027303 C 3 0 2 36 3 0 2 36 3 0 2 36 4 0 2 39 3 0 2 36 6 0 2 45 Y 30 +99. 0 -Contig113_chrX_26287829_26288398 385 C T 59.6 chrX 26288213 C 9 0 2 54 9 0 2 54 17 0 2 78 11 0 2 60 3 8 1 44 4 0 2 39 N 13 0.077 0 -Contig237_chrX_31256648_31257654 165 T A 246.0 chrX 31256814 T 7 0 2 48 23 0 2 96 19 0 2 84 17 0 2 78 14 0 2 69 8 0 2 51 Y 37 1.481 0 -Contig90_chrX_57430715_57431566 548 C T 116.0 chrX 57431266 T 9 0 2 54 18 0 2 81 13 0 2 66 14 0 2 69 8 0 2 54 7 0 2 48 Y 261 0.154 1 -Contig133_chrX_84833782_84834125 182 G A 69.7 chrX 84833962 G 5 0 2 42 18 0 2 81 12 0 2 63 19 0 2 84 6 3 1 27 7 0 2 48 N 619 0.278 0 -Contig125_chrX_93319363_93320877 349 A C 145.0 chrX 93319721 A 4 0 2 39 6 0 2 45 11 0 2 60 10 0 2 57 13 0 2 66 6 0 2 45 Y 59 1.686 0
--- a/test-data/test_out/pca/admix.gd_indivs Fri Sep 28 11:34:31 2012 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,6 +0,0 @@ -PB1 M All_Individuals -PB2 M All_Individuals -PB3 M All_Individuals -PB4 M All_Individuals -PB6 M All_Individuals -PB8 M All_Individuals
--- a/test-data/test_out/pca/admix.gd_snp Fri Sep 28 11:34:31 2012 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,303 +0,0 @@ - snp1 11 0.002 2000 A T - snp3 11 0.002 2000 A T - snp4 11 0.002 2000 A T - snp5 11 0.002 2000 A T - snp6 11 0.002 2000 A T - snp7 11 0.002 2000 A T - snp8 11 0.002 2000 A T - snp9 11 0.002 2000 A T - snp10 11 0.002 2000 A T - snp11 11 0.002 2000 A T - snp12 11 0.002 2000 A T - snp13 11 0.002 2000 A T - snp14 11 0.002 2000 A T - snp16 11 0.002 2000 A T - snp17 11 0.002 2000 A T - snp22 11 0.002 2000 A T - snp24 11 0.002 2000 A T - snp25 11 0.002 2000 A T - snp27 11 0.002 2000 A T - snp28 11 0.002 2000 A T - snp29 11 0.002 2000 A T - snp30 11 0.002 2000 A T - snp31 11 0.002 2000 A T - snp33 11 0.002 2000 A T - snp34 11 0.002 2000 A T - snp37 11 0.002 2000 A T - snp38 11 0.002 2000 A T - snp39 11 0.002 2000 A T - snp40 11 0.002 2000 A T - snp41 11 0.002 2000 A T - snp42 11 0.002 2000 A T - snp43 11 0.002 2000 A T - snp45 11 0.002 2000 A T - snp46 11 0.002 2000 A T - snp47 11 0.002 2000 A T - snp48 11 0.002 2000 A T - snp49 11 0.002 2000 A T - snp50 11 0.002 2000 A T - snp51 11 0.002 2000 A T - snp52 11 0.002 2000 A T - snp53 11 0.002 2000 A T - snp54 11 0.002 2000 A T - snp56 11 0.002 2000 A T - snp58 11 0.002 2000 A T - snp59 11 0.002 2000 A T - snp60 11 0.002 2000 A T - snp61 11 0.002 2000 A T - snp62 11 0.002 2000 A T - snp63 11 0.002 2000 A T - snp64 11 0.002 2000 A T - snp65 11 0.002 2000 A T - snp67 11 0.002 2000 A T - snp68 11 0.002 2000 A T - snp70 11 0.002 2000 A T - snp71 11 0.002 2000 A T - snp72 11 0.002 2000 A T - snp73 11 0.002 2000 A T - snp74 11 0.002 2000 A T - snp75 11 0.002 2000 A T - snp76 11 0.002 2000 A T - snp77 11 0.002 2000 A T - snp78 11 0.002 2000 A T - snp80 11 0.002 2000 A T - snp81 11 0.002 2000 A T - snp83 11 0.002 2000 A T - snp84 11 0.002 2000 A T - snp87 11 0.002 2000 A T - snp89 11 0.002 2000 A T - snp90 11 0.002 2000 A T - snp91 11 0.002 2000 A T - snp92 11 0.002 2000 A T - snp93 11 0.002 2000 A T - snp94 11 0.002 2000 A T - snp98 11 0.002 2000 A T - snp100 11 0.002 2000 A T - snp101 11 0.002 2000 A T - snp102 11 0.002 2000 A T - snp103 11 0.002 2000 A T - snp104 11 0.002 2000 A T - snp105 11 0.002 2000 A T - snp106 11 0.002 2000 A T - snp107 11 0.002 2000 A T - snp108 11 0.002 2000 A T - snp110 11 0.002 2000 A T - snp111 11 0.002 2000 A T - snp112 11 0.002 2000 A T - snp113 11 0.002 2000 A T - snp116 11 0.002 2000 A T - snp117 11 0.002 2000 A T - snp118 11 0.002 2000 A T - snp119 11 0.002 2000 A T - snp121 11 0.002 2000 A T - snp122 11 0.002 2000 A T - snp123 11 0.002 2000 A T - snp124 11 0.002 2000 A T - snp125 11 0.002 2000 A T - snp126 11 0.002 2000 A T - snp128 11 0.002 2000 A T - snp129 11 0.002 2000 A T - snp131 11 0.002 2000 A T - snp133 11 0.002 2000 A T - snp134 11 0.002 2000 A T - snp135 11 0.002 2000 A T - snp137 11 0.002 2000 A T - snp138 11 0.002 2000 A T - snp139 11 0.002 2000 A T - snp140 11 0.002 2000 A T - snp141 11 0.002 2000 A T - snp143 11 0.002 2000 A T - snp145 11 0.002 2000 A T - snp146 11 0.002 2000 A T - snp148 11 0.002 2000 A T - snp149 11 0.002 2000 A T - snp150 11 0.002 2000 A T - snp151 11 0.002 2000 A T - snp152 11 0.002 2000 A T - snp153 11 0.002 2000 A T - snp154 11 0.002 2000 A T - snp156 11 0.002 2000 A T - snp157 11 0.002 2000 A T - snp158 11 0.002 2000 A T - snp159 11 0.002 2000 A T - snp160 11 0.002 2000 A T - snp161 11 0.002 2000 A T - snp162 11 0.002 2000 A T - snp164 11 0.002 2000 A T - snp165 11 0.002 2000 A T - snp167 11 0.002 2000 A T - snp168 11 0.002 2000 A T - snp169 11 0.002 2000 A T - snp170 11 0.002 2000 A T - snp171 11 0.002 2000 A T - snp172 11 0.002 2000 A T - snp174 11 0.002 2000 A T - snp175 11 0.002 2000 A T - snp176 11 0.002 2000 A T - snp177 11 0.002 2000 A T - snp178 11 0.002 2000 A T - snp179 11 0.002 2000 A T - snp181 11 0.002 2000 A T - snp182 11 0.002 2000 A T - snp183 11 0.002 2000 A T - snp184 11 0.002 2000 A T - snp185 11 0.002 2000 A T - snp186 11 0.002 2000 A T - snp188 11 0.002 2000 A T - snp191 11 0.002 2000 A T - snp192 11 0.002 2000 A T - snp193 11 0.002 2000 A T - snp195 11 0.002 2000 A T - snp196 11 0.002 2000 A T - snp197 11 0.002 2000 A T - snp199 11 0.002 2000 A T - snp200 11 0.002 2000 A T - snp201 11 0.002 2000 A T - snp202 11 0.002 2000 A T - snp203 11 0.002 2000 A T - snp205 11 0.002 2000 A T - snp207 11 0.002 2000 A T - snp210 11 0.002 2000 A T - snp211 11 0.002 2000 A T - snp212 11 0.002 2000 A T - snp213 11 0.002 2000 A T - snp214 11 0.002 2000 A T - snp215 11 0.002 2000 A T - snp216 11 0.002 2000 A T - snp217 11 0.002 2000 A T - snp218 11 0.002 2000 A T - snp219 11 0.002 2000 A T - snp220 11 0.002 2000 A T - snp221 11 0.002 2000 A T - snp223 11 0.002 2000 A T - snp224 11 0.002 2000 A T - snp225 11 0.002 2000 A T - snp226 11 0.002 2000 A T - snp227 11 0.002 2000 A T - snp228 11 0.002 2000 A T - snp229 11 0.002 2000 A T - snp230 11 0.002 2000 A T - snp231 11 0.002 2000 A T - snp232 11 0.002 2000 A T - snp235 11 0.002 2000 A T - snp236 11 0.002 2000 A T - snp237 11 0.002 2000 A T - snp239 11 0.002 2000 A T - snp240 11 0.002 2000 A T - snp241 11 0.002 2000 A T - snp242 11 0.002 2000 A T - snp243 11 0.002 2000 A T - snp244 11 0.002 2000 A T - snp246 11 0.002 2000 A T - snp247 11 0.002 2000 A T - snp248 11 0.002 2000 A T - snp249 11 0.002 2000 A T - snp250 11 0.002 2000 A T - snp251 11 0.002 2000 A T - snp252 11 0.002 2000 A T - snp253 11 0.002 2000 A T - snp254 11 0.002 2000 A T - snp255 11 0.002 2000 A T - snp256 11 0.002 2000 A T - snp257 11 0.002 2000 A T - snp258 11 0.002 2000 A T - snp260 11 0.002 2000 A T - snp261 11 0.002 2000 A T - snp262 11 0.002 2000 A T - snp263 11 0.002 2000 A T - snp264 11 0.002 2000 A T - snp265 11 0.002 2000 A T - snp266 11 0.002 2000 A T - snp267 11 0.002 2000 A T - snp268 11 0.002 2000 A T - snp269 11 0.002 2000 A T - snp270 11 0.002 2000 A T - snp271 11 0.002 2000 A T - snp273 11 0.002 2000 A T - snp274 11 0.002 2000 A T - snp275 11 0.002 2000 A T - snp276 11 0.002 2000 A T - snp277 11 0.002 2000 A T - snp278 11 0.002 2000 A T - snp281 11 0.002 2000 A T - snp282 11 0.002 2000 A T - snp284 11 0.002 2000 A T - snp287 11 0.002 2000 A T - snp288 11 0.002 2000 A T - snp289 11 0.002 2000 A T - snp290 11 0.002 2000 A T - snp291 11 0.002 2000 A T - snp292 11 0.002 2000 A T - snp293 11 0.002 2000 A T - snp294 11 0.002 2000 A T - snp297 11 0.002 2000 A T - snp298 11 0.002 2000 A T - snp299 11 0.002 2000 A T - snp300 11 0.002 2000 A T - snp301 11 0.002 2000 A T - snp302 11 0.002 2000 A T - snp303 11 0.002 2000 A T - snp304 11 0.002 2000 A T - snp307 11 0.002 2000 A T - snp308 11 0.002 2000 A T - snp309 11 0.002 2000 A T - snp310 11 0.002 2000 A T - snp312 11 0.002 2000 A T - snp313 11 0.002 2000 A T - snp316 11 0.002 2000 A T - snp317 11 0.002 2000 A T - snp320 11 0.002 2000 A T - snp321 11 0.002 2000 A T - snp322 11 0.002 2000 A T - snp323 11 0.002 2000 A T - snp324 11 0.002 2000 A T - snp325 11 0.002 2000 A T - snp328 11 0.002 2000 A T - snp329 11 0.002 2000 A T - snp331 11 0.002 2000 A T - snp332 11 0.002 2000 A T - snp333 11 0.002 2000 A T - snp334 11 0.002 2000 A T - snp335 11 0.002 2000 A T - snp336 11 0.002 2000 A T - snp338 11 0.002 2000 A T - snp339 11 0.002 2000 A T - snp341 11 0.002 2000 A T - snp342 11 0.002 2000 A T - snp344 11 0.002 2000 A T - snp345 11 0.002 2000 A T - snp348 11 0.002 2000 A T - snp350 11 0.002 2000 A T - snp352 11 0.002 2000 A T - snp353 11 0.002 2000 A T - snp354 11 0.002 2000 A T - snp355 11 0.002 2000 A T - snp360 11 0.002 2000 A T - snp361 11 0.002 2000 A T - snp362 11 0.002 2000 A T - snp364 11 0.002 2000 A T - snp366 11 0.002 2000 A T - snp369 11 0.002 2000 A T - snp370 11 0.002 2000 A T - snp371 11 0.002 2000 A T - snp372 11 0.002 2000 A T - snp373 11 0.002 2000 A T - snp374 11 0.002 2000 A T - snp375 11 0.002 2000 A T - snp376 11 0.002 2000 A T - snp377 11 0.002 2000 A T - snp378 11 0.002 2000 A T - snp379 11 0.002 2000 A T - snp380 11 0.002 2000 A T - snp381 11 0.002 2000 A T - snp382 11 0.002 2000 A T - snp383 11 0.002 2000 A T - snp384 11 0.002 2000 A T - snp385 11 0.002 2000 A T - snp386 11 0.002 2000 A T - snp389 11 0.002 2000 A T - snp390 11 0.002 2000 A T - snp393 11 0.002 2000 A T - snp395 11 0.002 2000 A T - snp397 11 0.002 2000 A T - snp400 11 0.002 2000 A T
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/test_out/pca/admix.geno Fri Sep 28 11:35:56 2012 -0400 @@ -0,0 +1,303 @@ +222222 +222222 +222222 +222222 +222222 +222222 +222222 +222222 +222222 +222222 +222222 +222222 +222222 +222222 +222222 +222222 +222222 +222222 +222222 +222222 +222222 +222222 +222222 +222222 +222222 +222222 +222222 +222222 +222222 +222222 +222222 +222222 +222222 +222222 +222222 +222222 +222222 +222222 +222222 +222222 +222222 +222222 +222222 +222222 +222222 +222222 +222222 +222222 +222222 +222222 +222222 +222222 +222222 +222222 +222222 +222222 +222222 +222222 +222222 +222222 +222222 +222222 +222222 +222222 +222222 +222222 +222222 +222222 +222222 +222222 +222222 +222222 +222222 +222222 +222222 +222222 +222222 +222222 +222222 +222222 +222222 +222222 +222222 +222222 +222222 +222222 +222222 +222222 +222222 +222222 +222222 +222222 +222222 +222222 +222222 +222222 +222222 +222222 +122222 +222222 +222222 +222222 +222222 +222222 +222222 +222222 +222222 +222222 +222222 +222222 +222222 +222222 +222222 +222222 +222222 +222222 +222222 +222222 +222222 +222222 +222222 +222222 +222222 +222222 +222222 +122222 +222222 +222222 +222222 +222222 +222222 +222222 +222212 +222222 +222222 +222221 +222222 +222222 +222222 +222222 +222222 +222222 +222222 +222222 +222222 +222222 +222222 +222222 +222222 +222222 +222222 +222222 +222222 +222222 +222222 +222222 +222222 +222222 +222222 +222222 +222222 +222222 +222222 +222222 +222222 +222222 +222222 +222222 +222222 +222222 +222222 +222222 +222222 +222222 +222222 +222222 +222222 +222222 +222222 +222222 +222222 +222222 +222222 +222222 +222222 +222222 +222222 +222222 +222222 +222222 +222222 +222222 +222222 +222222 +222222 +222222 +222222 +222222 +222222 +222222 +222222 +222222 +222222 +212222 +222222 +222222 +222222 +222222 +222222 +222222 +222222 +122211 +222222 +222222 +222222 +222222 +222222 +222222 +222222 +222222 +222222 +222222 +222222 +222222 +222222 +222222 +222222 +222222 +222222 +222222 +222222 +222222 +122222 +222222 +222222 +222222 +222222 +222222 +222222 +222222 +222222 +222222 +222222 +222222 +222222 +222222 +222222 +222222 +222022 +222222 +222222 +222222 +222222 +222222 +222222 +222222 +222222 +222222 +222222 +222222 +222222 +222222 +222222 +222222 +222222 +222222 +222222 +222222 +222222 +222222 +221221 +222222 +222222 +222222 +222222 +222222 +222222 +222222 +222222 +222222 +222122 +222222 +222222 +222222 +222222 +222222 +222222 +222222 +222222 +222222 +122222 +222222 +222222 +222222 +222222 +222222 +222222 +222222 +222222 +222212 +222222 +222222 +222222
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/test_out/pca/coordinates.txt Fri Sep 28 11:35:56 2012 -0400 @@ -0,0 +1,7 @@ + #eigvals: 3.243 1.103 + PB1 0.1887 0.4703 All_Individuals + PB2 0.0398 0.0455 All_Individuals + PB3 0.1647 -0.6945 All_Individuals + PB4 -0.8954 -0.0220 All_Individuals + PB6 0.1887 0.4703 All_Individuals + PB8 0.3135 -0.2696 All_Individuals
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/test_out/pca/explained.txt Fri Sep 28 11:35:56 2012 -0400 @@ -0,0 +1,4 @@ +Percentage explained by eigenvectors: +1: 64.9% +2: 22.1% +3: 13.1%
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/test_out/pca/par.admix Fri Sep 28 11:35:56 2012 -0400 @@ -0,0 +1,7 @@ +genotypename: /scratch/galaxy/home/oocyte/galaxy_oocyte/database/files/000/dataset_260_files/admix.geno +snpname: /scratch/galaxy/home/oocyte/galaxy_oocyte/database/files/000/dataset_260_files/admix.snp +indivname: /scratch/galaxy/home/oocyte/galaxy_oocyte/database/files/000/dataset_260_files/admix.ind +evecoutname: /scratch/galaxy/home/oocyte/galaxy_oocyte/database/files/000/dataset_260_files/coordinates.txt +evaloutname: /scratch/galaxy/home/oocyte/galaxy_oocyte/database/files/000/dataset_260_files/admix.eval +altnormstyle: NO +numoutevec: 2
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/test_out/pca/pca.html Fri Sep 28 11:35:56 2012 -0400 @@ -0,0 +1,37 @@ +<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN" +"http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd"> +<html xmlns="http://www.w3.org/1999/xhtml"> + <head> + <meta http-equiv="Content-type" content="text/html; charset=UTF-8" /> + <link rel="stylesheet" href="/static/style/base.css" type="text/css" /> + <title>PCA Galaxy Composite Dataset</title> + </head> + <body> + <div class="document"> + Output completed: 2012-04-03 02:19:05 PM + <p/> + <div id="gd_outputs"> + Outputs + <ul> + <li><a href="PCA.pdf">PCA.pdf</a></li> + <li><a href="coordinates.txt">coordinates.txt</a></li> + <li><a href="explained.txt">explained.txt</a></li> + </ul> + </div> + <div id="gd_inputs"> + Inputs + <ul> + <li><a href="par.admix">par.admix</a></li> + <li><a href="admix.geno">admix.geno</a></li> + <li><a href="admix.snp">admix.snp</a></li> + <li><a href="admix.ind">admix.ind</a></li> + </ul> + </div> + <div id="gd_misc"> + Stats<p/><pre> + +</pre> + </div> + </div> + </body> +</html>
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/test_out/phylogenetic_tree/distance_matrix.phylip Fri Sep 28 11:35:56 2012 -0400 @@ -0,0 +1,8 @@ +7 + canFam2 0.0000 0.3205 0.3085 0.3193 0.3101 0.3138 0.3170 + PB1 0.3205 0.0000 0.0103 0.0100 0.0130 0.0119 0.0112 + PB2 0.3085 0.0103 0.0000 0.0033 0.0062 0.0094 0.0062 + PB3 0.3193 0.0100 0.0033 0.0000 0.0081 0.0091 0.0054 + PB4 0.3101 0.0130 0.0062 0.0081 0.0000 0.0099 0.0088 + PB6 0.3138 0.0119 0.0094 0.0091 0.0099 0.0000 0.0079 + PB8 0.3170 0.0112 0.0062 0.0054 0.0088 0.0079 0.0000
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/test_out/phylogenetic_tree/informative_snps.txt Fri Sep 28 11:35:56 2012 -0400 @@ -0,0 +1,7 @@ + canFam2 0 338 339 350 345 342 344 + PB1 338 0 338 344 338 336 339 + PB2 339 338 0 345 338 339 338 + PB3 350 344 345 0 347 342 347 + PB4 345 338 338 347 0 337 341 + PB6 342 336 339 342 337 0 343 + PB8 344 339 338 347 341 343 0
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/test_out/phylogenetic_tree/mega_distance_matrix.txt Fri Sep 28 11:35:56 2012 -0400 @@ -0,0 +1,27 @@ +#mega +!Title: Galaxy; +!Format DataType=Distance DataFormat=LowerLeft NTaxa=7; + +[1] #canFam2 +[2] #PB1 +[3] #PB2 +[4] #PB3 +[5] #PB4 +[6] #PB6 +[7] #PB8 + + + +[ 1 2 3 4 5 6 7 ] +[1] +[2] 0.3205 +[3] 0.3085 0.0103 +[4] 0.3193 0.0100 0.0033 +[5] 0.3101 0.0130 0.0062 0.0081 +[6] 0.3138 0.0119 0.0094 0.0091 0.0099 +[7] 0.3170 0.0112 0.0062 0.0054 0.0088 0.0079 + + + + +
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/test_out/phylogenetic_tree/phylogenetic_tree.html Fri Sep 28 11:35:56 2012 -0400 @@ -0,0 +1,49 @@ +<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN" +"http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd"> +<html xmlns="http://www.w3.org/1999/xhtml"> + <head> + <meta http-equiv="Content-type" content="text/html; charset=UTF-8" /> + <link rel="stylesheet" href="/static/style/base.css" type="text/css" /> + <title>Phylogenetic tree Galaxy Composite Dataset</title> + </head> + <body> + <div class="document"> + Output completed: 2012-04-03 01:57:44 PM + <p/> + <div id="gd_outputs"> + Outputs + <ul> + <li><a href="tree.pdf">tree.pdf</a></li> + <li><a href="phylogenetic_tree.newick">phylogenetic tree (newick)</a></li> + <li><a href="distance_matrix.phylip">Phylip distance matrix</a></li> + <li><a href="mega_distance_matrix.txt">Mega distance matrix</a></li> + <li><a href="informative_snps.txt">informative SNPs</a></li> + </ul> + </div> + <div id="gd_inputs"> + Inputs + <ul> + <li>Minimum coverage: 3</li> + <li>Minimum quality: 30</li> + <li>Include reference sequence: yes</li> + <li>Data source: sequence coverage</li> + <li>Branch type: square</li> + <li>Draw branches to scale: yes</li> + <li>Show branch lengths: yes</li> + <li>Tree layout: horizontal</li> + </ul> + </div> + <div id="gd_misc"> + Individuals +<ol> +<li>PB1</li> +<li>PB2</li> +<li>PB3</li> +<li>PB4</li> +<li>PB6</li> +<li>PB8</li> +</ol> + </div> + </div> + </body> +</html>
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/test_out/phylogenetic_tree/phylogenetic_tree.newick Fri Sep 28 11:35:56 2012 -0400 @@ -0,0 +1,16 @@ +( +( +( +PB4:0.00174, +canFam2:0.30836) +:0.00188, +PB2:0.00042) +:0.00210, +( +PB6:0.00470, +PB1:0.00720) +:0.00035, +( +PB8:0.00288, +PB3:0.00252) +:0.00055);
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/test_out/population_structure/graphical.pdf Fri Sep 28 11:35:56 2012 -0400 @@ -0,0 +1,147 @@ +%PDF-1.4 +%âãÏÓ\r +1 0 obj +<< +/CreationDate (D:20120403142055) +/ModDate (D:20120403142055) +/Title (R Graphics Output) +/Producer (R 2.11.0) +/Creator (R) +>> +endobj +2 0 obj +<< +/Type /Catalog +/Pages 3 0 R +>> +endobj +5 0 obj +<< +/Type /Page +/Parent 3 0 R +/Contents 6 0 R +/Resources 4 0 R +>> +endobj +6 0 obj +<< +/Length 7 0 R +>> +stream +1 J 1 j q +Q q +1.000 0.000 0.000 rg +74.40 74.27 54.86 0.00 re f +0.000 1.000 1.000 rg +74.40 74.27 54.86 82.69 re f +1.000 0.000 0.000 rg +140.23 74.27 54.86 82.69 re f +0.000 1.000 1.000 rg +140.23 156.96 54.86 0.00 re f +1.000 0.000 0.000 rg +206.06 74.27 54.86 82.69 re f +0.000 1.000 1.000 rg +206.06 156.96 54.86 0.00 re f +1.000 0.000 0.000 rg +271.89 74.27 54.86 0.00 re f +0.000 1.000 1.000 rg +271.89 74.27 54.86 82.69 re f +1.000 0.000 0.000 rg +337.71 74.27 54.86 82.69 re f +0.000 1.000 1.000 rg +337.71 156.96 54.86 0.00 re f +1.000 0.000 0.000 rg +403.54 74.27 54.86 82.69 re f +0.000 1.000 1.000 rg +403.54 156.96 54.86 0.00 re f +BT +0.000 0.000 0.000 rg +/F2 1 Tf 12.00 0.00 -0.00 12.00 236.05 18.72 Tm (Individual #) Tj +ET +BT +/F2 1 Tf 0.00 12.00 -12.00 0.00 12.96 91.68 Tm [(Ancestr) -30 (y)] TJ +ET +Q q +0.000 0.000 0.000 RG +0.75 w +[] 0 d +1 J +1 j +10.00 M +59.04 74.27 m 59.04 156.96 l S +59.04 74.27 m 51.84 74.27 l S +59.04 90.81 m 51.84 90.81 l S +59.04 107.34 m 51.84 107.34 l S +59.04 123.88 m 51.84 123.88 l S +59.04 140.42 m 51.84 140.42 l S +59.04 156.96 m 51.84 156.96 l S +BT +0.000 0.000 0.000 rg +/F2 1 Tf 0.00 12.00 -12.00 0.00 41.76 65.93 Tm (0.0) Tj +ET +BT +/F2 1 Tf 0.00 12.00 -12.00 0.00 41.76 99.00 Tm (0.4) Tj +ET +BT +/F2 1 Tf 0.00 12.00 -12.00 0.00 41.76 132.08 Tm (0.8) Tj +ET +Q +endstream +endobj +7 0 obj +1275 +endobj +3 0 obj +<< +/Type /Pages +/Kids [ +5 0 R +] +/Count 1 +/MediaBox [0 0 504 216] +>> +endobj +4 0 obj +<< +/ProcSet [/PDF /Text] +/Font <</F2 9 0 R >> +/ExtGState << >> +>> +endobj +8 0 obj +<< +/Type /Encoding +/BaseEncoding /WinAnsiEncoding +/Differences [ 45/minus 96/quoteleft +144/dotlessi /grave /acute /circumflex /tilde /macron /breve /dotaccent +/dieresis /.notdef /ring /cedilla /.notdef /hungarumlaut /ogonek /caron /space] +>> +endobj +9 0 obj << +/Type /Font +/Subtype /Type1 +/Name /F2 +/BaseFont /Helvetica +/Encoding 8 0 R +>> endobj +xref +0 10 +0000000000 65535 f +0000000021 00000 n +0000000164 00000 n +0000001641 00000 n +0000001724 00000 n +0000000213 00000 n +0000000293 00000 n +0000001621 00000 n +0000001805 00000 n +0000002062 00000 n +trailer +<< +/Size 10 +/Info 1 0 R +/Root 2 0 R +>> +startxref +2158 +%%EOF
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/test_out/population_structure/numeric.txt Fri Sep 28 11:35:56 2012 -0400 @@ -0,0 +1,6 @@ +0.000010 0.999990 +0.999990 0.000010 +0.999990 0.000010 +0.000010 0.999990 +0.999990 0.000010 +0.999990 0.000010
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/test_out/population_structure/population_structure.html Fri Sep 28 11:35:56 2012 -0400 @@ -0,0 +1,44 @@ +<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN" +"http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd"> +<html xmlns="http://www.w3.org/1999/xhtml"> + <head> + <meta http-equiv="Content-type" content="text/html; charset=UTF-8" /> + <link rel="stylesheet" href="/static/style/base.css" type="text/css" /> + <title>Population structure Galaxy Composite Dataset</title> + </head> + <body> + <div class="document"> + Output completed: 2012-04-03 02:20:55 PM + <p/> + <div id="gd_outputs"> + Outputs + <ul> + <li><a href="graphical.pdf">graphical.pdf</a></li> + <li><a href="numeric.txt">numeric.txt</a></li> + </ul> + </div> + <div id="gd_inputs"> + Inputs + <ul> + <li>Number of populations: 2</li> + </ul> + </div> + <div id="gd_misc"> + Populations +<ul> +<li> +All Individuals +<ol> +<li>PB1</li> +<li>PB2</li> +<li>PB3</li> +<li>PB4</li> +<li>PB6</li> +<li>PB8</li> +</ol> +</li> +</ul> + </div> + </div> + </body> +</html>
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/test_out/prepare_population_structure/admix.map Fri Sep 28 11:35:56 2012 -0400 @@ -0,0 +1,303 @@ +1 snp1 0 2 +1 snp3 0 4 +1 snp4 0 5 +1 snp5 0 6 +1 snp6 0 7 +1 snp7 0 8 +1 snp8 0 9 +1 snp9 0 10 +1 snp10 0 11 +1 snp11 0 12 +1 snp12 0 13 +1 snp13 0 14 +1 snp14 0 15 +1 snp16 0 17 +1 snp17 0 18 +1 snp22 0 23 +1 snp24 0 25 +1 snp25 0 26 +1 snp27 0 28 +1 snp28 0 29 +1 snp29 0 30 +1 snp30 0 31 +1 snp31 0 32 +1 snp33 0 34 +1 snp34 0 35 +1 snp37 0 38 +1 snp38 0 39 +1 snp39 0 40 +1 snp40 0 41 +1 snp41 0 42 +1 snp42 0 43 +1 snp43 0 44 +1 snp45 0 46 +1 snp46 0 47 +1 snp47 0 48 +1 snp48 0 49 +1 snp49 0 50 +1 snp50 0 51 +1 snp51 0 52 +1 snp52 0 53 +1 snp53 0 54 +1 snp54 0 55 +1 snp56 0 57 +1 snp58 0 59 +1 snp59 0 60 +1 snp60 0 61 +1 snp61 0 62 +1 snp62 0 63 +1 snp63 0 64 +1 snp64 0 65 +1 snp65 0 66 +1 snp67 0 68 +1 snp68 0 69 +1 snp70 0 71 +1 snp71 0 72 +1 snp72 0 73 +1 snp73 0 74 +1 snp74 0 75 +1 snp75 0 76 +1 snp76 0 77 +1 snp77 0 78 +1 snp78 0 79 +1 snp80 0 81 +1 snp81 0 82 +1 snp83 0 84 +1 snp84 0 85 +1 snp87 0 88 +1 snp89 0 90 +1 snp90 0 91 +1 snp91 0 92 +1 snp92 0 93 +1 snp93 0 94 +1 snp94 0 95 +1 snp98 0 99 +1 snp100 0 101 +1 snp101 0 102 +1 snp102 0 103 +1 snp103 0 104 +1 snp104 0 105 +1 snp105 0 106 +1 snp106 0 107 +1 snp107 0 108 +1 snp108 0 109 +1 snp110 0 111 +1 snp111 0 112 +1 snp112 0 113 +1 snp113 0 114 +1 snp116 0 117 +1 snp117 0 118 +1 snp118 0 119 +1 snp119 0 120 +1 snp121 0 122 +1 snp122 0 123 +1 snp123 0 124 +1 snp124 0 125 +1 snp125 0 126 +1 snp126 0 127 +1 snp128 0 129 +1 snp129 0 130 +1 snp131 0 132 +1 snp133 0 134 +1 snp134 0 135 +1 snp135 0 136 +1 snp137 0 138 +1 snp138 0 139 +1 snp139 0 140 +1 snp140 0 141 +1 snp141 0 142 +1 snp143 0 144 +1 snp145 0 146 +1 snp146 0 147 +1 snp148 0 149 +1 snp149 0 150 +1 snp150 0 151 +1 snp151 0 152 +1 snp152 0 153 +1 snp153 0 154 +1 snp154 0 155 +1 snp156 0 157 +1 snp157 0 158 +1 snp158 0 159 +1 snp159 0 160 +1 snp160 0 161 +1 snp161 0 162 +1 snp162 0 163 +1 snp164 0 165 +1 snp165 0 166 +1 snp167 0 168 +1 snp168 0 169 +1 snp169 0 170 +1 snp170 0 171 +1 snp171 0 172 +1 snp172 0 173 +1 snp174 0 175 +1 snp175 0 176 +1 snp176 0 177 +1 snp177 0 178 +1 snp178 0 179 +1 snp179 0 180 +1 snp181 0 182 +1 snp182 0 183 +1 snp183 0 184 +1 snp184 0 185 +1 snp185 0 186 +1 snp186 0 187 +1 snp188 0 189 +1 snp191 0 192 +1 snp192 0 193 +1 snp193 0 194 +1 snp195 0 196 +1 snp196 0 197 +1 snp197 0 198 +1 snp199 0 200 +1 snp200 0 201 +1 snp201 0 202 +1 snp202 0 203 +1 snp203 0 204 +1 snp205 0 206 +1 snp207 0 208 +1 snp210 0 211 +1 snp211 0 212 +1 snp212 0 213 +1 snp213 0 214 +1 snp214 0 215 +1 snp215 0 216 +1 snp216 0 217 +1 snp217 0 218 +1 snp218 0 219 +1 snp219 0 220 +1 snp220 0 221 +1 snp221 0 222 +1 snp223 0 224 +1 snp224 0 225 +1 snp225 0 226 +1 snp226 0 227 +1 snp227 0 228 +1 snp228 0 229 +1 snp229 0 230 +1 snp230 0 231 +1 snp231 0 232 +1 snp232 0 233 +1 snp235 0 236 +1 snp236 0 237 +1 snp237 0 238 +1 snp239 0 240 +1 snp240 0 241 +1 snp241 0 242 +1 snp242 0 243 +1 snp243 0 244 +1 snp244 0 245 +1 snp246 0 247 +1 snp247 0 248 +1 snp248 0 249 +1 snp249 0 250 +1 snp250 0 251 +1 snp251 0 252 +1 snp252 0 253 +1 snp253 0 254 +1 snp254 0 255 +1 snp255 0 256 +1 snp256 0 257 +1 snp257 0 258 +1 snp258 0 259 +1 snp260 0 261 +1 snp261 0 262 +1 snp262 0 263 +1 snp263 0 264 +1 snp264 0 265 +1 snp265 0 266 +1 snp266 0 267 +1 snp267 0 268 +1 snp268 0 269 +1 snp269 0 270 +1 snp270 0 271 +1 snp271 0 272 +1 snp273 0 274 +1 snp274 0 275 +1 snp275 0 276 +1 snp276 0 277 +1 snp277 0 278 +1 snp278 0 279 +1 snp281 0 282 +1 snp282 0 283 +1 snp284 0 285 +1 snp287 0 288 +1 snp288 0 289 +1 snp289 0 290 +1 snp290 0 291 +1 snp291 0 292 +1 snp292 0 293 +1 snp293 0 294 +1 snp294 0 295 +1 snp297 0 298 +1 snp298 0 299 +1 snp299 0 300 +1 snp300 0 301 +1 snp301 0 302 +1 snp302 0 303 +1 snp303 0 304 +1 snp304 0 305 +1 snp307 0 308 +1 snp308 0 309 +1 snp309 0 310 +1 snp310 0 311 +1 snp312 0 313 +1 snp313 0 314 +1 snp316 0 317 +1 snp317 0 318 +1 snp320 0 321 +1 snp321 0 322 +1 snp322 0 323 +1 snp323 0 324 +1 snp324 0 325 +1 snp325 0 326 +1 snp328 0 329 +1 snp329 0 330 +1 snp331 0 332 +1 snp332 0 333 +1 snp333 0 334 +1 snp334 0 335 +1 snp335 0 336 +1 snp336 0 337 +1 snp338 0 339 +1 snp339 0 340 +1 snp341 0 342 +1 snp342 0 343 +1 snp344 0 345 +1 snp345 0 346 +1 snp348 0 349 +1 snp350 0 351 +1 snp352 0 353 +1 snp353 0 354 +1 snp354 0 355 +1 snp355 0 356 +1 snp360 0 361 +1 snp361 0 362 +1 snp362 0 363 +1 snp364 0 365 +1 snp366 0 367 +1 snp369 0 370 +1 snp370 0 371 +1 snp371 0 372 +1 snp372 0 373 +1 snp373 0 374 +1 snp374 0 375 +1 snp375 0 376 +1 snp376 0 377 +1 snp377 0 378 +1 snp378 0 379 +1 snp379 0 380 +1 snp380 0 381 +1 snp381 0 382 +1 snp382 0 383 +1 snp383 0 384 +1 snp384 0 385 +1 snp385 0 386 +1 snp386 0 387 +1 snp389 0 390 +1 snp390 0 391 +1 snp393 0 394 +1 snp395 0 396 +1 snp397 0 398 +1 snp400 0 401
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/test_out/prepare_population_structure/admix.ped Fri Sep 28 11:35:56 2012 -0400 @@ -0,0 +1,6 @@ +PB1 1 0 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 2 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 2 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 2 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 2 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 2 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 +PB2 1 0 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 2 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 +PB3 1 0 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 2 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 +PB4 1 0 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 2 2 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 2 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 +PB6 1 0 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 2 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 2 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 2 1 1 1 1 1 1 +PB8 1 0 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 2 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 2 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 2 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/test_out/prepare_population_structure/prepare_population_structure.html Fri Sep 28 11:35:56 2012 -0400 @@ -0,0 +1,47 @@ +<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN" +"http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd"> +<html xmlns="http://www.w3.org/1999/xhtml"> + <head> + <meta http-equiv="Content-type" content="text/html; charset=UTF-8" /> + <link rel="stylesheet" href="/static/style/base.css" type="text/css" /> + <title>Prepare to look for population structure Galaxy Composite Dataset</title> + </head> + <body> + <div class="document"> + Output completed: 2012-04-03 02:17:44 PM + <p/> + <div id="gd_outputs"> + Outputs + <ul> + <li><a href="admix.ped">admix.ped</a></li> + <li><a href="admix.map">admix.map</a></li> + <li>Using 303 of 400 SNPs</li> + </ul> + </div> + <div id="gd_inputs"> + Inputs + <ul> + <li>Minimum reads covering a SNP, per individual: 3</li> + <li>Minimum quality value, per individual: 30</li> + <li>Minimum spacing between SNPs on the same scaffold: 0</li> + </ul> + </div> + <div id="gd_misc"> + Populations +<ul> +<li> +All Individuals +<ol> +<li>PB1</li> +<li>PB2</li> +<li>PB3</li> +<li>PB4</li> +<li>PB6</li> +<li>PB8</li> +</ol> +</li> +</ul> + </div> + </div> + </body> +</html>
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/test_out/rank_pathways/rank_pathways.tabular Fri Sep 28 11:35:56 2012 -0400 @@ -0,0 +1,240 @@ +3 0.25 1 cfa03450=Non-homologous end-joining +1 0.25 1 cfa00750=Vitamin B6 metabolism +2 0.2 3 cfa00290=Valine, leucine and isoleucine biosynthesis +3 0.18 4 cfa00770=Pantothenate and CoA biosynthesis +5 0.17 5 cfa05310=Asthma +4 0.16 6 cfa00760=Nicotinate and nicotinamide metabolism +2 0.12 7 cfa00450=Selenocompound metabolism +4 0.11 8 cfa05330=Allograft rejection +5 0.098 9 cfa04672=Intestinal immune network for IgA production +4 0.098 9 cfa02010=ABC transporters +2 0.095 11 cfa03430=Mismatch repair +4 0.089 12 cfa05320=Autoimmune thyroid disease +4 0.089 12 cfa00280=Valine, leucine and isoleucine degradation +3 0.088 14 cfa03410=Base excision repair +3 0.088 14 cfa03030=DNA replication +3 0.088 14 cfa00565=Ether lipid metabolism +6 0.087 17 cfa05140=Leishmaniasis +2 0.087 17 cfa04977=Vitamin digestion and absorption +1 0.083 19 cfa00740=Riboflavin metabolism +4 0.08 20 cfa05150=Staphylococcus aureus infection +2 0.08 20 cfa03060=Protein export +3 0.079 22 cfa05340=Primary immunodeficiency +3 0.079 22 cfa05143=African trypanosomiasis +6 0.078 24 cfa00564=Glycerophospholipid metabolism +2 0.077 25 cfa00410=beta-Alanine metabolism +2 0.071 26 cfa05332=Graft-versus-host disease +5 0.069 27 cfa03320=PPAR signaling pathway +6 0.066 28 cfa05323=Rheumatoid arthritis +5 0.063 29 cfa04664=Fc epsilon RI signaling pathway +3 0.062 30 cfa00561=Glycerolipid metabolism +2 0.062 30 cfa00350=Tyrosine metabolism +2 0.062 30 cfa00020=Citrate cycle (TCA cycle) +2 0.059 33 cfa00260=Glycine, serine and threonine metabolism +1 0.059 33 cfa04614=Renin-angiotensin system +1 0.059 33 cfa00360=Phenylalanine metabolism +9 0.058 36 cfa04145=Phagosome +3 0.058 36 cfa05213=Endometrial cancer +4 0.057 38 cfa05416=Viral myocarditis +2 0.057 38 cfa00500=Starch and sucrose metabolism +2 0.056 40 cfa04130=SNARE interactions in vesicular transport +1 0.056 40 cfa00592=alpha-Linolenic acid metabolism +1 0.053 42 cfa04964=Proximal tubule bicarbonate reclamation +1 0.053 42 cfa00630=Glyoxylate and dicarboxylate metabolism +3 0.052 44 cfa04621=NOD-like receptor signaling pathway +2 0.05 45 cfa05219=Bladder cancer +2 0.05 45 cfa04940=Type I diabetes mellitus +2 0.05 45 cfa00380=Tryptophan metabolism +2 0.047 48 cfa03420=Nucleotide excision repair +3 0.045 49 cfa04920=Adipocytokine signaling pathway +3 0.045 49 cfa00970=Aminoacyl-tRNA biosynthesis +2 0.045 49 cfa00071=Fatty acid metabolism +1 0.045 49 cfa00591=Linoleic acid metabolism +1 0.045 49 cfa00340=Histidine metabolism +4 0.043 54 cfa04972=Pancreatic secretion +2 0.043 54 cfa03022=Basal transcription factors +2 0.043 54 cfa00982=Drug metabolism - cytochrome P450 +3 0.042 57 cfa05218=Melanoma +3 0.042 57 cfa05211=Renal cell carcinoma +4 0.041 59 cfa05414=Dilated cardiomyopathy +2 0.04 60 cfa00590=Arachidonic acid metabolism +1 0.04 60 cfa04320=Dorso-ventral axis formation +3 0.039 62 cfa04662=B cell receptor signaling pathway +2 0.039 62 cfa00310=Lysine degradation +3 0.038 64 cfa04512=ECM-receptor interaction +2 0.038 64 cfa05144=Malaria +2 0.038 64 cfa00270=Cysteine and methionine metabolism +1 0.038 64 cfa03440=Homologous recombination +1 0.038 64 cfa00052=Galactose metabolism +8 0.037 69 cfa04810=Regulation of actin cytoskeleton +4 0.037 69 cfa05146=Amoebiasis +4 0.037 69 cfa04666=Fc gamma R-mediated phagocytosis +2 0.037 69 cfa05223=Non-small cell lung cancer +6 0.036 73 cfa05168=Herpes simplex infection +6 0.036 73 cfa05152=Tuberculosis +3 0.036 73 cfa04640=Hematopoietic cell lineage +7 0.034 76 cfa04510=Focal adhesion +3 0.034 76 cfa00240=Pyrimidine metabolism +3 0.033 78 cfa03008=Ribosome biogenesis in eukaryotes +1 0.033 78 cfa00983=Drug metabolism - other enzymes +2 0.032 80 cfa04976=Bile secretion +6 0.031 81 cfa04060=Cytokine-cytokine receptor interaction +4 0.031 81 cfa04110=Cell cycle +1 0.031 81 cfa00250=Alanine, aspartate and glutamate metabolism +4 0.029 84 cfa05145=Toxoplasmosis +3 0.029 84 cfa04650=Natural killer cell mediated cytotoxicity +2 0.029 84 cfa05214=Glioma +4 0.028 87 cfa05162=Measles +2 0.028 87 cfa05412=Arrhythmogenic right ventricular cardiomyopathy (ARVC) +7 0.027 89 cfa05166=HTLV-I infection +4 0.027 89 cfa05322=Systemic lupus erythematosus +2 0.027 89 cfa05212=Pancreatic cancer +2 0.026 92 cfa04146=Peroxisome +2 0.026 92 cfa04070=Phosphatidylinositol signaling system +1 0.026 92 cfa04978=Mineral absorption +2 0.025 95 cfa05133=Pertussis +2 0.025 95 cfa04612=Antigen processing and presentation +2 0.025 95 cfa04350=TGF-beta signaling pathway +1 0.025 95 cfa00830=Retinol metabolism +3 0.024 99 cfa04514=Cell adhesion molecules (CAMs) +2 0.024 99 cfa05410=Hypertrophic cardiomyopathy (HCM) +2 0.024 99 cfa04012=ErbB signaling pathway +1 0.024 99 cfa00980=Metabolism of xenobiotics by cytochrome P450 +1 0.024 99 cfa00640=Propanoate metabolism +3 0.023 104 cfa04360=Axon guidance +2 0.023 104 cfa04620=Toll-like receptor signaling pathway +1 0.023 104 cfa04975=Fat digestion and absorption +1 0.023 104 cfa04330=Notch signaling pathway +7 0.022 108 cfa05200=Pathways in cancer +3 0.022 108 cfa04910=Insulin signaling pathway +2 0.022 108 cfa05215=Prostate cancer +1 0.022 108 cfa03460=Fanconi anemia pathway +24 0.021 112 cfa01100=Metabolic pathways +3 0.021 112 cfa04630=Jak-STAT signaling pathway +1 0.021 112 cfa00480=Glutathione metabolism +3 0.020 115 cfa00230=Purine metabolism +2 0.020 115 cfa04540=Gap junction +1 0.02 115 cfa00620=Pyruvate metabolism +2 0.019 118 cfa04912=GnRH signaling pathway +2 0.018 119 cfa05142=Chagas disease (American trypanosomiasis) +2 0.018 119 cfa04380=Osteoclast differentiation +1 0.018 119 cfa05221=Acute myeloid leukemia +1 0.018 119 cfa00330=Arginine and proline metabolism +3 0.017 123 cfa05164=Influenza A +2 0.017 123 cfa04270=Vascular smooth muscle contraction +2 0.017 123 cfa04114=Oocyte meiosis +3 0.016 126 cfa04141=Protein processing in endoplasmic reticulum +3 0.016 126 cfa04020=Calcium signaling pathway +2 0.016 126 cfa05160=Hepatitis C +2 0.016 126 cfa04670=Leukocyte transendothelial migration +1 0.016 126 cfa05210=Colorectal cancer +1 0.016 126 cfa04610=Complement and coagulation cascades +1 0.016 126 cfa04150=mTOR signaling pathway +4 0.015 133 cfa04010=MAPK signaling pathway +1 0.015 133 cfa04974=Protein digestion and absorption +1 0.015 133 cfa04730=Long-term depression +1 0.015 133 cfa04115=p53 signaling pathway +1 0.014 137 cfa05220=Chronic myeloid leukemia +1 0.014 137 cfa04971=Gastric acid secretion +1 0.014 137 cfa04720=Long-term potentiation +1 0.014 137 cfa04370=VEGF signaling pathway +1 0.014 137 cfa04260=Cardiac muscle contraction +1 0.014 137 cfa03018=RNA degradation +2 0.013 143 cfa00010=Glycolysis / Gluconeogenesis +1 0.013 143 cfa04970=Salivary secretion +1 0.013 143 cfa04520=Adherens junction +2 0.012 146 cfa04062=Chemokine signaling pathway +1 0.012 146 cfa05134=Legionellosis +1 0.012 146 cfa05132=Salmonella infection +1 0.012 146 cfa04727=GABAergic synapse +1 0.012 146 cfa04210=Apoptosis +1 0.011 151 cfa03015=mRNA surveillance pathway +1 0.010 152 cfa04914=Progesterone-mediated oocyte maturation +1 0.0098 153 cfa04916=Melanogenesis +2 0.0095 154 cfa04144=Endocytosis +1 0.0087 155 cfa04142=Lysosome +1 0.0086 156 cfa04660=T cell receptor signaling pathway +1 0.0082 157 cfa04724=Glutamatergic synapse +2 0.0081 158 cfa04080=Neuroactive ligand-receptor interaction +1 0.0079 159 cfa04728=Dopaminergic synapse +2 0.0074 160 cfa05010=Alzheimer's disease +1 0.0074 160 cfa04722=Neurotrophin signaling pathway +1 0.0074 160 cfa04120=Ubiquitin mediated proteolysis +1 0.0068 163 cfa00190=Oxidative phosphorylation +1 0.0067 164 cfa05012=Parkinson's disease +1 0.0057 165 cfa03013=RNA transport +1 0.0056 166 cfa03040=Spliceosome +1 0.0049 167 cfa05016=Huntington's disease +1 0.0023 168 cfa04740=Olfactory transduction +0 0 169 cfa05222=Small cell lung cancer +0 0 169 cfa05217=Basal cell carcinoma +0 0 169 cfa05216=Thyroid cancer +0 0 169 cfa05100=Bacterial invasion of epithelial cells +0 0 169 cfa05020=Prion diseases +0 0 169 cfa05014=Amyotrophic lateral sclerosis (ALS) +0 0 169 cfa04973=Carbohydrate digestion and absorption +0 0 169 cfa04966=Collecting duct acid secretion +0 0 169 cfa04962=Vasopressin-regulated water reabsorption +0 0 169 cfa04961=Endocrine and other factor-regulated calcium reabsorption +0 0 169 cfa04960=Aldosterone-regulated sodium reabsorption +0 0 169 cfa04950=Maturity onset diabetes of the young +0 0 169 cfa04930=Type II diabetes mellitus +0 0 169 cfa04744=Phototransduction +0 0 169 cfa04742=Taste transduction +0 0 169 cfa04725=Cholinergic synapse +0 0 169 cfa04721=Synaptic vesicle cycle +0 0 169 cfa04710=Circadian rhythm - mammal +0 0 169 cfa04623=Cytosolic DNA-sensing pathway +0 0 169 cfa04622=RIG-I-like receptor signaling pathway +0 0 169 cfa04530=Tight junction +0 0 169 cfa04340=Hedgehog signaling pathway +0 0 169 cfa04310=Wnt signaling pathway +0 0 169 cfa04140=Regulation of autophagy +0 0 169 cfa04122=Sulfur relay system +0 0 169 cfa03050=Proteasome +0 0 169 cfa03020=RNA polymerase +0 0 169 cfa03010=Ribosome +0 0 169 cfa01040=Biosynthesis of unsaturated fatty acids +0 0 169 cfa00920=Sulfur metabolism +0 0 169 cfa00910=Nitrogen metabolism +0 0 169 cfa00900=Terpenoid backbone biosynthesis +0 0 169 cfa00860=Porphyrin and chlorophyll metabolism +0 0 169 cfa00790=Folate biosynthesis +0 0 169 cfa00785=Lipoic acid metabolism +0 0 169 cfa00780=Biotin metabolism +0 0 169 cfa00730=Thiamine metabolism +0 0 169 cfa00670=One carbon pool by folate +0 0 169 cfa00650=Butanoate metabolism +0 0 169 cfa00604=Glycosphingolipid biosynthesis - ganglio series +0 0 169 cfa00603=Glycosphingolipid biosynthesis - globo series +0 0 169 cfa00601=Glycosphingolipid biosynthesis - lacto and neolacto series +0 0 169 cfa00600=Sphingolipid metabolism +0 0 169 cfa00563=Glycosylphosphatidylinositol(GPI)-anchor biosynthesis +0 0 169 cfa00562=Inositol phosphate metabolism +0 0 169 cfa00534=Glycosaminoglycan biosynthesis - heparan sulfate +0 0 169 cfa00533=Glycosaminoglycan biosynthesis - keratan sulfate +0 0 169 cfa00532=Glycosaminoglycan biosynthesis - chondroitin sulfate +0 0 169 cfa00531=Glycosaminoglycan degradation +0 0 169 cfa00520=Amino sugar and nucleotide sugar metabolism +0 0 169 cfa00514=Other types of O-glycan biosynthesis +0 0 169 cfa00512=Mucin type O-Glycan biosynthesis +0 0 169 cfa00511=Other glycan degradation +0 0 169 cfa00510=N-Glycan biosynthesis +0 0 169 cfa00472=D-Arginine and D-ornithine metabolism +0 0 169 cfa00471=D-Glutamine and D-glutamate metabolism +0 0 169 cfa00460=Cyanoamino acid metabolism +0 0 169 cfa00430=Taurine and hypotaurine metabolism +0 0 169 cfa00400=Phenylalanine, tyrosine and tryptophan biosynthesis +0 0 169 cfa00300=Lysine biosynthesis +0 0 169 cfa00232=Caffeine metabolism +0 0 169 cfa00140=Steroid hormone biosynthesis +0 0 169 cfa00130=Ubiquinone and other terpenoid-quinone biosynthesis +0 0 169 cfa00120=Primary bile acid biosynthesis +0 0 169 cfa00100=Steroid biosynthesis +0 0 169 cfa00072=Synthesis and degradation of ketone bodies +0 0 169 cfa00062=Fatty acid elongation in mitochondria +0 0 169 cfa00061=Fatty acid biosynthesis +0 0 169 cfa00053=Ascorbate and aldarate metabolism +0 0 169 cfa00051=Fructose and mannose metabolism +0 0 169 cfa00040=Pentose and glucuronate interconversions +0 0 169 cfa00030=Pentose phosphate pathway
--- a/test-data/test_out/select_snps/select_snps.gd_snp Fri Sep 28 11:34:31 2012 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,102 +0,0 @@ -#{"column_names":["scaf","pos","A","B","qual","ref","rpos","rnuc","1A","1B","1G","1Q","2A","2B","2G","2Q","3A","3B","3G","3Q","4A","4B","4G","4Q","5A","5B","5G","5Q","6A","6B","6G","6Q","pair","dist", -#"prim","rflp"],"dbkey":"canFam2","individuals":[["PB1",9],["PB2",13],["PB3",17],["PB4",21],["PB6",25],["PB8",29]],"pos":2,"rPos":7,"ref":6,"scaffold":1,"species":"bear"} -Contig161_chr1_4641264_4641879 115 C T 73.5 chr1 4641382 C 6 0 2 45 8 0 2 51 15 0 2 72 5 0 2 42 6 0 2 45 10 0 2 57 Y 54 0.323 0 -Contig86_chr1_30984450_30985684 670 C T 365.0 chr1 30985133 C 9 0 2 54 10 0 2 57 13 0 2 66 3 0 2 36 9 0 2 54 7 0 2 48 Y 145 0.031 0 -Contig21_chr1_60697952_60699446 307 G A 51.9 chr1 60698265 G 12 0 2 63 9 0 2 54 4 0 2 39 6 0 2 45 9 0 2 54 4 0 2 39 Y 98 0.507 0 -Contig64_chr1_87343284_87345672 163 T A 3.76 chr1 87343443 C 0 2 2 1 0 0 -1 0 5 0 2 42 2 0 2 33 0 1 2 14 0 0 -1 0 N 3 0.039 2 -Contig20_chr1_110679280_110679687 181 C T 87.4 chr1 110679454 - 1 0 2 30 7 0 2 48 4 0 2 39 2 0 2 33 2 0 2 33 0 0 -1 0 N 31 0.660 2 -Contig222_chr2_9817738_9818143 220 C T 888.0 chr2 9817960 C 17 0 2 78 12 0 2 63 20 0 2 87 8 0 2 51 11 0 2 60 12 0 2 63 Y 76 0.093 1 -Contig47_chr2_25470778_25471576 126 G A 888.0 chr2 25470896 G 12 0 2 63 14 0 2 69 14 0 2 69 10 0 2 57 18 0 2 81 13 0 2 66 N 11 0.289 1 -Contig6_chr2_56859179_56859956 671 T C 999.9 chr2 56859851 T 15 0 2 72 18 0 2 81 20 0 2 90 19 0 2 84 19 0 2 84 24 0 2 99 N 28 5.308 1 -Contig163_chr2_76402959_76404830 221 C T 127.0 chr2 76403181 C 4 0 2 42 10 0 2 57 9 0 2 54 11 0 2 60 7 0 2 48 9 0 2 54 Y 54 0.178 1 -Contig56_chr3_17326225_17327548 387 G C 91.2 chr3 17326591 G 14 0 2 69 13 0 2 66 15 0 2 72 15 0 2 72 13 0 2 66 12 0 2 63 Y 20 0.225 3 -Contig108_chr3_46210055_46210874 367 A G 21.0 chr3 46210423 A 19 0 2 84 10 0 2 57 16 0 2 75 14 0 2 69 20 0 2 87 11 0 2 60 N 236 0.028 1 -Contig1_chr3_51588422_51589409 926 A G 51.0 chr3 51589353 G 2 0 2 33 2 0 2 33 6 0 2 45 4 0 2 39 9 0 2 54 11 0 2 60 N 21 1.147 0 -Contig65_chr3_80727952_80728283 39 T C 71.2 chr3 80727990 T 7 0 2 48 3 0 2 36 8 0 2 51 6 0 2 45 8 0 2 51 11 0 2 60 N 22 7.078 0 -Contig134_chr4_12145648_12148225 1326 C T 164.0 chr4 12146961 C 9 0 2 54 8 0 2 51 7 0 2 48 3 0 2 36 5 0 2 42 5 0 2 42 Y 4 0.080 1 -Contig19_chr4_26233601_26233991 146 G C 51.6 chr4 26233744 G 10 0 2 57 8 0 2 51 9 0 2 54 5 0 2 42 9 0 2 54 4 0 2 39 N 41 0.163 3 -Contig17_chr4_61310346_61311158 267 C T 49.9 chr4 61310604 T 10 0 2 57 7 0 2 48 9 0 2 54 10 0 2 57 14 0 2 69 7 0 2 48 Y 219 0.098 0 -Contig31_chr5_4734956_4736547 1166 C T 133.0 chr5 4736132 C 14 0 2 69 8 0 2 51 17 0 2 78 4 0 2 39 9 0 2 54 12 0 2 63 Y 1 0.021 0 -Contig6_chr5_26899813_26900498 97 A C 88.6 chr5 26899910 A 15 0 2 72 14 0 2 69 27 0 2 108 15 0 2 72 13 0 2 69 12 0 2 63 Y 92 7.370 3 -Contig45_chr5_50892738_50892968 169 C A 25.8 chr5 50892911 C 10 0 2 57 7 0 2 48 10 0 2 60 6 0 2 45 6 0 2 45 13 0 2 66 N 244 0.497 1 -Contig45_chr5_76133561_76134403 388 A G 103.0 chr5 76133941 G 3 0 2 36 8 0 2 51 8 0 2 51 5 0 2 42 6 0 2 45 7 0 2 48 Y 57 0.038 0 -Contig111_chr6_5821219_5822519 1060 A G 68.1 chr6 5822321 T 7 0 2 48 6 0 2 45 11 0 2 60 9 0 2 54 3 0 2 36 12 0 2 63 Y 7 0.231 1 -Contig102_chr6_30271329_30271577 39 T G 139.0 chr6 30271371 G 3 0 2 36 4 0 2 39 6 0 2 45 1 0 2 30 4 0 2 39 4 0 2 39 N 15 1.159 0 -Contig112_chr6_51024554_51024851 100 A G 121.0 chr6 51024654 A 10 0 2 57 12 0 2 63 9 0 2 54 13 0 2 66 14 0 2 69 17 0 2 78 N 75 4.287 0 -Contig84_chr7_6648683_6650255 1297 G A 110.0 chr7 6649988 G 18 0 2 81 9 0 2 54 22 0 2 77 16 0 2 75 20 0 2 87 6 0 2 45 Y 83 0.166 0 -Contig206_chr7_26281823_26282074 103 C A 101.0 chr7 26281925 T 11 0 2 60 16 0 2 61 19 0 2 84 6 0 2 45 19 0 2 84 16 0 2 75 N -1 0.947 1 -Contig38_chr7_50681997_50682600 42 T C 92.4 chr7 50682037 G 6 0 2 45 2 0 2 33 10 0 2 57 12 0 2 63 5 0 2 42 6 0 2 45 Y 94 0.146 0 -Contig91_chr8_12804505_12805470 409 C A 111.0 chr8 12804906 C 8 0 2 51 10 0 2 57 15 0 2 72 12 0 2 63 14 0 2 69 15 0 2 72 N 145 0.175 0 -Contig8_chr8_27811135_27812620 333 C T 37.9 chr8 27811458 C 4 0 2 39 11 0 2 60 18 0 2 81 5 0 2 42 6 0 2 45 5 0 2 42 Y 1 0.272 0 -Contig17_chr8_57490059_57490498 69 G T 97.4 chr8 57490127 A 2 0 2 33 11 0 2 60 15 0 2 72 16 0 2 75 8 0 2 51 10 0 2 57 N 40 0.522 5 -Contig73_chr9_29451535_29452248 616 A G 24.7 chr9 29452127 G 4 0 2 39 7 0 2 48 1 0 2 30 4 0 2 39 7 0 2 48 6 0 2 45 N 49 0.448 4 -Contig96_chr9_39008495_39009278 215 A C 98.7 chr9 39008708 C 7 0 2 48 13 0 2 66 28 0 2 111 16 0 2 75 17 0 2 78 17 0 2 78 Y 8 0.427 1 -Contig22_chr10_15505382_15505589 172 T C 38.5 chr10 15505548 T 2 0 2 33 6 0 2 45 8 0 2 51 8 0 2 51 9 0 2 54 12 0 2 63 N 284 2.861 0 -Contig69_chr10_40547265_40548153 371 G A 58.1 chr10 40547649 A 9 0 2 54 8 0 2 51 8 0 2 51 9 0 2 54 4 0 2 39 5 0 2 42 Y 20 0.138 4 -Contig9_chr10_51475063_51476054 770 C T 57.3 chr10 51475839 C 6 0 2 45 16 0 2 75 16 0 2 75 13 0 2 66 9 0 2 54 9 2 2 21 N 80 0.394 0 -Contig72_chr11_7142765_7143772 146 G A 152.0 chr11 7142911 A 8 0 2 51 8 0 2 51 24 0 2 99 10 0 2 57 17 0 2 78 11 0 2 60 Y 90 1.137 0 -Contig7_chr11_40017076_40017630 352 C T 46.3 chr11 40017422 C 7 0 2 48 9 0 2 54 6 0 2 45 8 0 2 51 16 0 2 75 9 0 2 54 Y 44 0.336 0 -Contig16_chr11_53408448_53408790 187 A G 153.0 chr11 53408638 A 7 0 2 48 9 0 2 54 18 0 2 81 10 0 2 57 11 0 2 60 12 0 2 63 Y 116 1.367 0 -Contig21_chr12_18403415_18404381 586 G T 34.5 chr12 18403983 - 13 0 2 66 16 0 2 75 25 0 2 102 12 0 2 63 12 0 2 63 14 0 2 69 Y 12 0.068 0 -Contig41_chr12_25565452_25566993 475 G T 6.29 chr12 25565926 G 15 0 2 72 14 0 2 69 10 0 2 57 15 0 2 72 18 0 2 81 19 0 2 84 N 10 2.231 1 -Contig5_chr12_53880670_53882675 1221 A C 99.4 chr12 53881888 A 16 0 2 75 18 0 2 81 23 0 2 96 10 0 2 57 15 0 2 72 17 0 2 78 Y 31 0.061 0 -Contig107_chr13_26045881_26046290 341 C G 81.4 chr13 26046230 C 16 0 2 75 20 0 2 90 14 0 2 69 15 0 2 72 9 0 2 54 9 0 2 54 Y 51 4.510 0 -Contig251_chr13_28498333_28501066 864 T G 296.0 chr13 28499180 T 3 0 2 36 5 0 2 42 4 0 2 39 2 0 2 33 5 0 2 42 6 0 2 45 Y 9 0.068 0 -Contig55_chr13_53467708_53468101 221 T G 132.0 chr13 53467925 T 25 0 2 102 12 0 2 63 26 0 2 105 7 0 2 48 16 0 2 75 16 0 2 75 N 20 5.717 1 -Contig48_chr14_11839435_11843272 3014 A G 163.0 chr14 11842446 A 10 0 2 57 8 0 2 51 13 0 2 66 10 0 2 57 5 0 2 42 10 0 2 57 Y 31 0.908 0 -Contig28_chr14_26905747_26909514 975 G C 3.13 chr14 26906723 G 16 0 2 75 10 0 2 57 12 0 2 63 15 0 2 72 10 0 2 57 7 0 2 48 N 287 0.117 2 -Contig64_chr14_56768376_56768902 473 C T 29.0 chr14 56768832 C 15 0 2 72 11 0 2 60 14 0 2 69 14 0 2 69 7 0 2 48 9 0 2 54 Y 91 8.281 0 -Contig60_chr15_18493036_18494316 150 G A 92.6 chr15 18493188 G 9 0 2 54 13 0 2 66 9 0 2 54 6 0 2 45 5 0 2 42 12 0 2 63 Y 45 0.125 0 -Contig112_chr15_26772864_26773267 374 C T 21.6 chr15 26773244 C 4 0 2 39 4 0 2 39 5 0 2 42 2 0 2 33 4 0 2 39 3 0 2 36 N 18 +99. 0 -Contig119_chr16_6160274_6160477 180 G A 54.8 chr16 6160457 G 7 0 2 48 6 0 2 45 12 0 2 63 3 0 2 36 11 0 2 60 10 0 2 57 N 42 +99. 0 -Contig60_chr16_28079136_28080263 588 T G 157.0 chr16 28079739 T 22 0 2 93 20 0 2 87 22 0 2 93 17 0 2 78 12 0 2 63 10 0 2 57 Y 105 5.999 1 -Contig31_chr17_12128267_12129637 205 G A 90.5 chr17 12128484 G 7 0 2 48 6 0 2 45 6 0 2 45 11 0 2 60 7 0 2 48 4 0 2 39 Y 10 0.246 0 -Contig99_chr17_26021506_26022200 505 C T 88.8 chr17 26022017 T 15 0 2 72 13 0 2 66 19 0 2 84 9 0 2 54 10 0 2 57 11 0 2 60 Y 1 0.172 1 -Contig27_chr17_61713766_61716585 1056 G C 40.0 chr17 61714821 G 4 0 2 39 8 0 2 51 10 0 2 57 6 0 2 45 6 0 2 45 3 0 2 36 N 6 2.200 4 -Contig229_chr18_3706523_3708577 1076 A G 83.9 chr18 3707630 A 11 0 2 60 13 0 2 66 26 0 2 105 11 0 2 60 15 0 2 72 17 0 2 78 Y 63 0.445 0 -Contig82_chr18_27305489_27306229 566 C T 49.5 chr18 27306051 A 6 0 2 45 6 0 2 45 10 0 2 57 11 0 2 60 6 0 2 45 7 0 2 48 N 1 0.349 0 -Contig64_chr18_55979770_55980315 49 G A 89.1 chr18 55979824 G 3 0 2 36 9 0 2 54 7 0 2 51 4 0 2 39 3 0 2 36 3 0 2 36 Y -1 2.124 0 -Contig146_chr19_5221790_5223013 143 A G 114.0 chr19 5221916 - 1 0 2 30 4 0 2 39 3 0 2 36 5 0 2 42 2 0 2 33 5 0 2 42 Y 12 0.870 0 -Contig129_chr19_25541958_25542221 202 T C 68.1 chr19 25542154 C 11 0 2 60 19 0 2 84 10 0 2 60 17 0 2 78 9 0 2 54 12 0 2 63 N -1 2.551 1 -Contig60_chr19_54013816_54014398 281 A G 138.0 chr19 54014103 C 6 0 2 45 15 0 2 72 7 0 2 48 10 0 2 57 15 0 2 72 10 0 2 57 Y 188 1.271 0 -Contig50_chr20_12138509_12141975 3206 C A 248.0 chr20 12141763 C 8 0 2 51 15 0 2 72 14 0 2 69 6 0 2 45 10 0 2 57 7 0 2 48 Y 2 0.384 0 -Contig36_chr20_32631363_32632049 176 G A 24.1 chr20 32631526 G 7 0 2 48 14 0 2 69 19 0 2 84 14 0 2 69 15 0 2 72 16 0 2 75 N 50 1.150 0 -Contig50_chr21_4178523_4178687 121 G A 362.0 chr21 4178640 G 8 0 2 51 14 0 2 69 5 0 2 42 3 0 2 36 11 0 2 60 4 0 2 39 N 392 0.483 0 -Contig129_chr21_31045749_31046924 381 A G 129.0 chr21 31046141 A 19 0 2 84 8 0 2 51 23 0 2 96 12 0 2 63 15 0 2 72 18 0 2 81 Y 69 0.028 2 -Contig159_chr22_7896450_7896974 109 G C 151.0 chr22 7896570 G 16 0 2 75 5 7 1 62 14 0 2 69 16 0 2 75 13 0 2 66 13 0 2 66 Y 16 0.465 0 -Contig23_chr22_34612023_34612568 167 C G 92.3 chr22 34612181 C 11 0 2 60 18 0 2 81 13 0 2 66 8 0 2 51 12 0 2 63 14 0 2 69 Y 7 0.409 0 -Contig26_chr22_57817664_57819633 1453 A G 150.0 chr22 57819121 G 9 0 2 54 9 0 2 54 13 0 2 66 15 0 2 72 11 0 2 60 14 0 2 69 N 15 0.471 1 -Contig133_chr23_3525134_3526502 1223 A G 201.0 chr23 3526387 A 11 0 2 60 13 0 2 66 23 0 2 96 21 0 2 90 13 0 2 66 10 0 2 57 Y 61 1.359 0 -Contig35_chr23_28447813_28449115 70 T A 21.3 chr23 28447881 T 9 0 2 54 8 0 2 51 10 0 2 57 9 0 2 54 10 0 2 57 12 0 2 63 N 251 0.163 1 -Contig50_chr24_22515247_22516072 761 C T 243.0 chr24 22515981 T 11 0 2 60 10 0 2 57 8 0 2 51 9 0 2 54 18 0 2 81 8 0 2 51 Y 1 0.190 0 -Contig84_chr24_29196623_29199644 466 C T 126.0 chr24 29197091 T 7 0 2 48 11 0 2 60 8 0 2 51 7 0 2 48 11 0 2 60 15 0 2 72 Y 42 0.215 0 -Contig144_chr25_4011170_4013134 541 A G 160.0 chr25 4011690 A 12 0 2 63 17 0 2 78 13 0 2 66 13 0 2 66 13 0 2 66 13 0 2 66 Y 5 0.087 0 -Contig103_chr25_38891221_38892140 407 G A 131.0 chr25 38891644 G 8 0 2 51 14 0 2 69 18 0 2 81 8 0 2 51 8 0 2 51 11 0 2 60 Y 149 0.167 4 -Contig204_chr26_4311195_4311778 170 C T 16.9 chr26 4311363 T 20 0 2 87 8 0 2 51 13 0 2 66 18 0 2 81 11 0 2 60 14 0 2 69 N 35 0.085 0 -Contig146_chr26_26622638_26623906 574 G A 186.0 chr26 26623219 A 11 0 2 60 12 0 2 63 9 0 2 54 11 0 2 60 9 0 2 54 12 0 2 63 Y 1 0.318 0 -Contig135_chr27_6853874_6854079 158 C T 116.0 chr27 6854032 T 18 0 2 81 19 0 2 84 13 0 2 66 7 0 2 48 8 0 2 51 11 0 2 60 N 4 0.060 1 -Contig64_chr27_34654435_34654621 132 C A 115.0 chr27 34654567 T 2 0 2 33 2 0 2 33 5 0 2 42 3 0 2 36 3 0 2 36 8 0 2 51 N 12 0.297 1 -Contig131_chr28_6481806_6483783 138 C T 36.2 chr28 6481953 C 12 0 2 63 12 0 2 63 20 0 2 87 11 0 2 60 10 0 2 57 12 0 2 63 Y 10 0.387 0 -Contig60_chr28_30197166_30197364 92 T C 164.0 chr28 30197258 T 10 0 2 57 13 0 2 66 15 0 2 72 16 0 2 75 12 0 2 63 11 0 2 60 N 369 1.139 0 -Contig29_chr29_4726399_4727143 559 A T 163.0 chr29 4726955 A 15 0 2 72 18 0 2 81 18 0 2 81 16 0 2 75 11 0 2 60 14 0 2 72 Y 161 3.114 0 -Contig1_chr30_5992217_5993068 106 C T 129.0 chr30 5992319 C 10 0 2 57 11 0 2 60 7 0 2 48 11 0 2 60 10 0 2 57 12 0 2 63 Y 76 1.079 0 -Contig165_chr30_25804389_25804926 190 T C 126.0 chr30 25804592 C 3 0 2 36 8 0 2 51 7 0 2 48 10 0 2 57 7 0 2 48 4 0 2 39 Y 113 0.329 0 -Contig38_chr31_5164423_5166573 2074 C T 134.0 chr31 5166501 T 13 0 2 66 10 0 2 57 17 0 2 78 11 0 2 60 17 0 2 78 10 0 2 57 Y 58 +99. 0 -Contig17_chr31_26433828_26434459 498 T C 9.79 chr31 26434322 T 18 0 2 81 10 0 2 57 15 0 2 72 13 0 2 66 16 0 2 75 15 0 2 72 Y 137 4.814 0 -Contig9_chr32_19479532_19479735 12 A G 20.7 chr32 19479544 A 1 0 2 30 2 0 2 33 1 0 2 30 5 0 2 42 3 0 2 36 3 0 2 36 N 17 +99. 0 -Contig30_chr32_25902721_25905783 208 C G 162.0 chr32 25902927 G 11 0 2 60 13 0 2 66 11 0 2 60 12 0 2 63 7 0 2 48 11 0 2 60 Y 145 0.322 2 -Contig18_chr33_22207246_22209159 1363 G T 51.5 chr33 22208619 - 16 0 2 75 8 0 2 51 11 0 2 60 10 0 2 57 15 0 2 72 12 0 2 63 Y 59 2.560 0 -Contig170_chr33_26189421_26189940 292 T C 98.4 chr33 26189703 T 21 0 2 90 13 0 2 66 15 0 2 72 13 0 2 66 19 0 2 84 13 0 2 66 Y 23 0.307 0 -Contig113_chr34_13341080_13341643 236 C T 90.7 chr34 13341316 C 4 0 2 39 2 0 2 33 8 0 2 51 4 0 2 39 8 0 2 51 3 0 2 36 Y 47 0.412 3 -Contig152_chr34_31794848_31795540 242 G A 93.2 chr34 31795093 G 11 0 2 60 24 0 2 99 17 0 2 78 15 0 2 72 18 0 2 81 17 0 2 78 Y 123 2.780 0 -Contig47_chr35_3666773_3667898 348 G T 124.0 chr35 3667121 G 9 0 2 54 20 0 2 87 18 0 2 81 15 0 2 72 12 0 2 63 14 0 2 69 Y 285 0.235 0 -Contig74_chr35_25394343_25394813 303 A T 221.0 chr35 25394646 G 23 0 2 96 15 0 2 72 25 0 2 105 7 7 1 49 18 0 2 81 16 0 2 75 Y 58 4.298 0 -Contig5_chr36_4562983_4563634 343 C T 151.0 chr36 4563324 T 20 0 2 87 20 0 2 87 23 0 2 96 24 0 2 99 9 0 2 54 8 0 2 51 Y 40 1.169 0 -Contig133_chr36_32954045_32955409 136 A G 116.0 chr36 32954182 A 16 0 2 75 15 0 2 72 20 0 2 87 11 0 2 60 18 0 2 81 13 0 2 66 Y 74 3.772 1 -Contig53_chr37_6665763_6665919 116 C T 111.0 chr37 6665875 C 9 0 2 54 9 0 2 54 5 0 2 42 9 0 2 54 8 0 2 51 10 0 2 57 N 15 10.875 1 -Contig2_chr37_31197993_31198256 182 C T 39.6 chr37 31198171 T 6 0 2 45 10 0 2 57 7 0 2 48 9 0 2 54 10 0 2 57 12 0 2 63 N 2 0.595 0 -Contig7_chr38_12217200_12218387 1163 A T 44.4 chr38 12218353 A 11 0 2 60 13 0 2 66 17 0 2 78 10 0 2 57 11 0 2 60 11 0 2 60 Y 67 +99. 0 -Contig265_chrX_2689247_2689484 114 C G 103.0 chrX 2689356 C 11 0 2 60 9 0 2 54 13 0 2 66 16 0 2 75 14 0 2 69 10 0 2 57 N 2 9.232 1 -Contig113_chrX_26287829_26288398 385 C T 59.6 chrX 26288213 C 9 0 2 54 9 0 2 54 17 0 2 78 11 0 2 60 3 8 1 44 4 0 2 39 N 13 0.077 0 -Contig90_chrX_57430715_57431566 548 C T 116.0 chrX 57431266 T 9 0 2 54 18 0 2 81 13 0 2 66 14 0 2 69 8 0 2 54 7 0 2 48 Y 261 0.154 1 -Contig133_chrX_84833782_84834125 182 G A 69.7 chrX 84833962 G 5 0 2 42 18 0 2 81 12 0 2 63 19 0 2 84 6 3 1 27 7 0 2 48 N 619 0.278 0
--- a/test-data/test_out/specify_restriction_enzymes/specify_restriction_enzymes.gd_snp Fri Sep 28 11:34:31 2012 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,10 +0,0 @@ -#{"column_names":["scaf","pos","A","B","qual","ref","rpos","rnuc","1A","1B","1G","1Q","2A","2B","2G","2Q","3A","3B","3G","3Q","4A","4B","4G","4Q","5A","5B","5G","5Q","6A","6B","6G","6Q","pair","dist", -#"prim","rflp"],"dbkey":"canFam2","individuals":[["PB1",9],["PB2",13],["PB3",17],["PB4",21],["PB6",25],["PB8",29]],"pos":2,"rPos":7,"ref":6,"scaffold":1,"species":"bear"} -Contig47_chr2_25470778_25471576 126 G A 888.0 chr2 25470896 G 12 0 2 63 14 0 2 69 14 0 2 69 10 0 2 57 18 0 2 81 13 0 2 66 N 11 0.289 1 -Contig73_chr9_29451535_29452248 616 A G 24.7 chr9 29452127 G 4 0 2 39 7 0 2 48 1 0 2 30 4 0 2 39 7 0 2 48 6 0 2 45 N 49 0.448 4 -Contig69_chr10_40547265_40548153 371 G A 58.1 chr10 40547649 A 9 0 2 54 8 0 2 51 8 0 2 51 9 0 2 54 4 0 2 39 5 0 2 42 Y 20 0.138 4 -Contig99_chr17_26021506_26022200 505 C T 88.8 chr17 26022017 T 15 0 2 72 13 0 2 66 19 0 2 84 9 0 2 54 10 0 2 57 11 0 2 60 Y 1 0.172 1 -Contig27_chr17_61713766_61716585 1056 G C 40.0 chr17 61714821 G 4 0 2 39 8 0 2 51 10 0 2 57 6 0 2 45 6 0 2 45 3 0 2 36 N 6 2.200 4 -Contig26_chr22_57817664_57819633 1453 A G 150.0 chr22 57819121 G 9 0 2 54 9 0 2 54 13 0 2 66 15 0 2 72 11 0 2 60 14 0 2 69 N 15 0.471 1 -Contig103_chr25_38891221_38892140 407 G A 131.0 chr25 38891644 G 8 0 2 51 14 0 2 69 18 0 2 81 8 0 2 51 8 0 2 51 11 0 2 60 Y 149 0.167 4 -Contig64_chr27_34654435_34654621 132 C A 115.0 chr27 34654567 T 2 0 2 33 2 0 2 33 5 0 2 42 3 0 2 36 3 0 2 36 8 0 2 51 N 12 0.297 1
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/tool-data/gd.heterochromatic.loc.sample Fri Sep 28 11:35:56 2012 -0400 @@ -0,0 +1,2 @@ +# ref_species heterochromatic_file +#canFam2 /galaxy/local_data/genome_diversity/dpmix/canFam2_heterochrom.txt
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/tool-data/gd.oscar.loc.sample Fri Sep 28 11:35:56 2012 -0400 @@ -0,0 +1,4 @@ +#<species> <data_file> +#hg19 /galaxy/local_data/genome_diversity/oscar/hsa_ENSEMBLcKEGGctpthw.tsv +#bosTau4 /galaxy/local_data/genome_diversity/oscar/bta_ENSEMBLcKEGGctpthw.tsv +#canFam2 /galaxy/local_data/genome_diversity/oscar/cfa_ENSEMBLcKEGGctpthw.tsv
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/tool-data/gd.pathways.txt.sample Fri Sep 28 11:35:56 2012 -0400 @@ -0,0 +1,721 @@ +hg19 hsa00010 hsa00010 - Glycolysis/ Gluconeogenesis +hg19 hsa00020 hsa00020 - Citratecycle (TCA cycle) +hg19 hsa00030 hsa00030 - Pentosephosphate pathway +hg19 hsa00040 hsa00040 - Pentoseand glucuronate interconversions +hg19 hsa00051 hsa00051 - Fructoseand mannose metabolism +hg19 hsa00052 hsa00052 - Galactosemetabolism +hg19 hsa00053 hsa00053 - Ascorbateand aldarate metabolism +hg19 hsa00061 hsa00061 - Fattyacid biosynthesis +hg19 hsa00062 hsa00062 - Fattyacid elongation +hg19 hsa00071 hsa00071 - Fattyacid metabolism +hg19 hsa00072 hsa00072 - Synthesisand degradation of ketone bodies +hg19 hsa00100 hsa00100 - Steroidbiosynthesis +hg19 hsa00120 hsa00120 - Primarybile acid biosynthesis +hg19 hsa00130 hsa00130 - Ubiquinoneand other terpenoid-quinone biosynthesis +hg19 hsa00140 hsa00140 - Steroidhormone biosynthesis +hg19 hsa00190 hsa00190 - Oxidativephosphorylation +hg19 hsa00230 hsa00230 - Purinemetabolism +hg19 hsa00232 hsa00232 - Caffeinemetabolism +hg19 hsa00240 hsa00240 - Pyrimidinemetabolism +hg19 hsa00250 hsa00250 - Alanine,aspartate and glutamate metabolism +hg19 hsa00260 hsa00260 - Glycine,serine and threonine metabolism +hg19 hsa00270 hsa00270 - Cysteineand methionine metabolism +hg19 hsa00280 hsa00280 - Valine,leucine and isoleucine degradation +hg19 hsa00290 hsa00290 - Valine,leucine and isoleucine biosynthesis +hg19 hsa00300 hsa00300 - Lysinebiosynthesis +hg19 hsa00310 hsa00310 - Lysinedegradation +hg19 hsa00330 hsa00330 - Arginineand proline metabolism +hg19 hsa00340 hsa00340 - Histidinemetabolism +hg19 hsa00350 hsa00350 - Tyrosinemetabolism +hg19 hsa00360 hsa00360 - Phenylalaninemetabolism +hg19 hsa00380 hsa00380 - Tryptophanmetabolism +hg19 hsa00400 hsa00400 - Phenylalanine,tyrosine and tryptophan biosynthesis +hg19 hsa00410 hsa00410 - beta-Alaninemetabolism +hg19 hsa00430 hsa00430 - Taurineand hypotaurine metabolism +hg19 hsa00450 hsa00450 - Selenocompoundmetabolism +hg19 hsa00460 hsa00460 - Cyanoaminoacid metabolism +hg19 hsa00480 hsa00480 - Glutathionemetabolism +hg19 hsa00500 hsa00500 - Starchand sucrose metabolism +hg19 hsa00510 hsa00510 - N-Glycanbiosynthesis +hg19 hsa00511 hsa00511 - Otherglycan degradation +hg19 hsa00512 hsa00512 - Mucintype O-Glycan biosynthesis +hg19 hsa00514 hsa00514 - Othertypes of O-glycan biosynthesis +hg19 hsa00520 hsa00520 - Aminosugar and nucleotide sugar metabolism +hg19 hsa00524 hsa00524 - Butirosinand neomycin biosynthesis +hg19 hsa00531 hsa00531 - Glycosaminoglycandegradation +hg19 hsa00532 hsa00532 - Glycosaminoglycanbiosynthesis - chondroitin sulfate +hg19 hsa00533 hsa00533 - Glycosaminoglycanbiosynthesis - keratan sulfate +hg19 hsa00534 hsa00534 - Glycosaminoglycanbiosynthesis - heparan sulfate +hg19 hsa00561 hsa00561 - Glycerolipidmetabolism +hg19 hsa00562 hsa00562 - Inositolphosphate metabolism +hg19 hsa00563 hsa00563 - Glycosylphosphatidylinositol(GPI)-anchorbiosynthesis +hg19 hsa00564 hsa00564 - Glycerophospholipidmetabolism +hg19 hsa00565 hsa00565 - Etherlipid metabolism +hg19 hsa00590 hsa00590 - Arachidonicacid metabolism +hg19 hsa00591 hsa00591 - Linoleicacid metabolism +hg19 hsa00592 hsa00592 - alpha-Linolenicacid metabolism +hg19 hsa00600 hsa00600 - Sphingolipidmetabolism +hg19 hsa00601 hsa00601 - Glycosphingolipidbiosynthesis - lacto and neolacto series +hg19 hsa00603 hsa00603 - Glycosphingolipidbiosynthesis - globo series +hg19 hsa00604 hsa00604 - Glycosphingolipidbiosynthesis - ganglio series +hg19 hsa00620 hsa00620 - Pyruvatemetabolism +hg19 hsa00630 hsa00630 - Glyoxylateand dicarboxylate metabolism +hg19 hsa00640 hsa00640 - Propanoatemetabolism +hg19 hsa00650 hsa00650 - Butanoatemetabolism +hg19 hsa00670 hsa00670 - Onecarbon pool by folate +hg19 hsa00730 hsa00730 - Thiaminemetabolism +hg19 hsa00740 hsa00740 - Riboflavinmetabolism +hg19 hsa00750 hsa00750 - VitaminB6 metabolism +hg19 hsa00760 hsa00760 - Nicotinateand nicotinamide metabolism +hg19 hsa00770 hsa00770 - Pantothenateand CoA biosynthesis +hg19 hsa00780 hsa00780 - Biotinmetabolism +hg19 hsa00785 hsa00785 - Lipoicacid metabolism +hg19 hsa00790 hsa00790 - Folatebiosynthesis +hg19 hsa00830 hsa00830 - Retinolmetabolism +hg19 hsa00860 hsa00860 - Porphyrinand chlorophyll metabolism +hg19 hsa00900 hsa00900 - Terpenoidbackbone biosynthesis +hg19 hsa00910 hsa00910 - Nitrogenmetabolism +hg19 hsa00920 hsa00920 - Sulfurmetabolism +hg19 hsa00970 hsa00970 - Aminoacyl-tRNAbiosynthesis +hg19 hsa00980 hsa00980 - Metabolismof xenobiotics by cytochrome P450 +hg19 hsa00982 hsa00982 - Drugmetabolism - cytochrome P450 +hg19 hsa00983 hsa00983 - Drugmetabolism - other enzymes +hg19 hsa01040 hsa01040 - Biosynthesisof unsaturated fatty acids +hg19 hsa01100 hsa01100 - Metabolicpathways +hg19 hsa02010 hsa02010 - ABCtransporters +hg19 hsa03008 hsa03008 - Ribosomebiogenesis in eukaryotes +hg19 hsa03010 hsa03010 - Ribosome +hg19 hsa03013 hsa03013 - RNAtransport +hg19 hsa03015 hsa03015 - mRNAsurveillance pathway +hg19 hsa03018 hsa03018 - RNAdegradation +hg19 hsa03020 hsa03020 - RNApolymerase +hg19 hsa03022 hsa03022 - Basaltranscription factors +hg19 hsa03030 hsa03030 - DNAreplication +hg19 hsa03040 hsa03040 - Spliceosome +hg19 hsa03050 hsa03050 - Proteasome +hg19 hsa03060 hsa03060 - Proteinexport +hg19 hsa03320 hsa03320 - PPARsignaling pathway +hg19 hsa03410 hsa03410 - Baseexcision repair +hg19 hsa03420 hsa03420 - Nucleotideexcision repair +hg19 hsa03430 hsa03430 - Mismatchrepair +hg19 hsa03440 hsa03440 - Homologousrecombination +hg19 hsa03450 hsa03450 - Non-homologousend-joining +hg19 hsa03460 hsa03460 - Fanconianemia pathway +hg19 hsa04010 hsa04010 - MAPKsignaling pathway +hg19 hsa04012 hsa04012 - ErbBsignaling pathway +hg19 hsa04020 hsa04020 - Calciumsignaling pathway +hg19 hsa04060 hsa04060 - Cytokine-cytokinereceptor interaction +hg19 hsa04062 hsa04062 - Chemokinesignaling pathway +hg19 hsa04070 hsa04070 - Phosphatidylinositolsignaling system +hg19 hsa04080 hsa04080 - Neuroactiveligand-receptor interaction +hg19 hsa04110 hsa04110 - Cellcycle +hg19 hsa04114 hsa04114 - Oocytemeiosis +hg19 hsa04115 hsa04115 - p53signaling pathway +hg19 hsa04120 hsa04120 - Ubiquitinmediated proteolysis +hg19 hsa04122 hsa04122 - Sulfurrelay system +hg19 hsa04130 hsa04130 - SNAREinteractions in vesicular transport +hg19 hsa04140 hsa04140 - Regulationof autophagy +hg19 hsa04141 hsa04141 - Proteinprocessing in endoplasmic reticulum +hg19 hsa04142 hsa04142 - Lysosome +hg19 hsa04144 hsa04144 - Endocytosis +hg19 hsa04145 hsa04145 - Phagosome +hg19 hsa04146 hsa04146 - Peroxisome +hg19 hsa04150 hsa04150 - mTORsignaling pathway +hg19 hsa04210 hsa04210 - Apoptosis +hg19 hsa04260 hsa04260 - Cardiacmuscle contraction +hg19 hsa04270 hsa04270 - Vascularsmooth muscle contraction +hg19 hsa04310 hsa04310 - Wntsignaling pathway +hg19 hsa04320 hsa04320 - Dorso-ventralaxis formation +hg19 hsa04330 hsa04330 - Notchsignaling pathway +hg19 hsa04340 hsa04340 - Hedgehogsignaling pathway +hg19 hsa04350 hsa04350 - TGF-betasignaling pathway +hg19 hsa04360 hsa04360 - Axonguidance +hg19 hsa04370 hsa04370 - VEGFsignaling pathway +hg19 hsa04380 hsa04380 - Osteoclastdifferentiation +hg19 hsa04510 hsa04510 - Focaladhesion +hg19 hsa04512 hsa04512 - ECM-receptorinteraction +hg19 hsa04514 hsa04514 - Celladhesion molecules (CAMs) +hg19 hsa04520 hsa04520 - Adherensjunction +hg19 hsa04530 hsa04530 - Tightjunction +hg19 hsa04540 hsa04540 - Gapjunction +hg19 hsa04610 hsa04610 - Complementand coagulation cascades +hg19 hsa04612 hsa04612 - Antigenprocessing and presentation +hg19 hsa04614 hsa04614 - Renin-angiotensinsystem +hg19 hsa04620 hsa04620 - Toll-likereceptor signaling pathway +hg19 hsa04621 hsa04621 - NOD-likereceptor signaling pathway +hg19 hsa04622 hsa04622 - RIG-I-likereceptor signaling pathway +hg19 hsa04623 hsa04623 - CytosolicDNA-sensing pathway +hg19 hsa04630 hsa04630 - Jak-STATsignaling pathway +hg19 hsa04640 hsa04640 - Hematopoieticcell lineage +hg19 hsa04650 hsa04650 - Naturalkiller cell mediated cytotoxicity +hg19 hsa04660 hsa04660 - Tcell receptor signaling pathway +hg19 hsa04662 hsa04662 - Bcell receptor signaling pathway +hg19 hsa04664 hsa04664 - Fcepsilon RI signaling pathway +hg19 hsa04666 hsa04666 - Fcgamma R-mediated phagocytosis +hg19 hsa04670 hsa04670 - Leukocytetransendothelial migration +hg19 hsa04672 hsa04672 - Intestinalimmune network for IgA production +hg19 hsa04710 hsa04710 - Circadianrhythm - mammal +hg19 hsa04720 hsa04720 - Long-termpotentiation +hg19 hsa04721 hsa04721 - Synapticvesicle cycle +hg19 hsa04722 hsa04722 - Neurotrophinsignaling pathway +hg19 hsa04724 hsa04724 - Glutamatergicsynapse +hg19 hsa04725 hsa04725 - Cholinergicsynapse +hg19 hsa04727 hsa04727 - GABAergicsynapse +hg19 hsa04728 hsa04728 - Dopaminergicsynapse +hg19 hsa04730 hsa04730 - Long-termdepression +hg19 hsa04740 hsa04740 - Olfactorytransduction +hg19 hsa04742 hsa04742 - Tastetransduction +hg19 hsa04744 hsa04744 - Phototransduction +hg19 hsa04810 hsa04810 - Regulationof actin cytoskeleton +hg19 hsa04910 hsa04910 - Insulinsignaling pathway +hg19 hsa04912 hsa04912 - GnRHsignaling pathway +hg19 hsa04914 hsa04914 - Progesterone-mediatedoocyte maturation +hg19 hsa04916 hsa04916 - Melanogenesis +hg19 hsa04920 hsa04920 - Adipocytokinesignaling pathway +hg19 hsa04930 hsa04930 - TypeII diabetes mellitus +hg19 hsa04940 hsa04940 - TypeI diabetes mellitus +hg19 hsa04950 hsa04950 - Maturityonset diabetes of the young +hg19 hsa04960 hsa04960 - Aldosterone-regulatedsodium reabsorption +hg19 hsa04961 hsa04961 - Endocrineand other factor-regulated calcium reabsorption +hg19 hsa04962 hsa04962 - Vasopressin-regulatedwater reabsorption +hg19 hsa04964 hsa04964 - Proximaltubule bicarbonate reclamation +hg19 hsa04966 hsa04966 - Collectingduct acid secretion +hg19 hsa04970 hsa04970 - Salivarysecretion +hg19 hsa04971 hsa04971 - Gastricacid secretion +hg19 hsa04972 hsa04972 - Pancreaticsecretion +hg19 hsa04973 hsa04973 - Carbohydratedigestion and absorption +hg19 hsa04974 hsa04974 - Proteindigestion and absorption +hg19 hsa04975 hsa04975 - Fatdigestion and absorption +hg19 hsa04976 hsa04976 - Bilesecretion +hg19 hsa04977 hsa04977 - Vitamindigestion and absorption +hg19 hsa04978 hsa04978 - Mineralabsorption +hg19 hsa05010 hsa05010 - Alzheimer'sdisease +hg19 hsa05012 hsa05012 - Parkinson'sdisease +hg19 hsa05014 hsa05014 - Amyotrophiclateral sclerosis (ALS) +hg19 hsa05016 hsa05016 - Huntington'sdisease +hg19 hsa05020 hsa05020 - Priondiseases +hg19 hsa05100 hsa05100 - Bacterialinvasion of epithelial cells +hg19 hsa05132 hsa05132 - Salmonellainfection +hg19 hsa05133 hsa05133 - Pertussis +hg19 hsa05134 hsa05134 - Legionellosis +hg19 hsa05140 hsa05140 - Leishmaniasis +hg19 hsa05142 hsa05142 - Chagasdisease (American trypanosomiasis) +hg19 hsa05143 hsa05143 - Africantrypanosomiasis +hg19 hsa05144 hsa05144 - Malaria +hg19 hsa05145 hsa05145 - Toxoplasmosis +hg19 hsa05146 hsa05146 - Amoebiasis +hg19 hsa05150 hsa05150 - Staphylococcusaureus infection +hg19 hsa05152 hsa05152 - Tuberculosis +hg19 hsa05160 hsa05160 - HepatitisC +hg19 hsa05162 hsa05162 - Measles +hg19 hsa05164 hsa05164 - InfluenzaA +hg19 hsa05166 hsa05166 - HTLV-Iinfection +hg19 hsa05168 hsa05168 - Herpessimplex infection +hg19 hsa05200 hsa05200 - Pathwaysin cancer +hg19 hsa05202 hsa05202 - Transcriptionalmisregulation in cancers +hg19 hsa05210 hsa05210 - Colorectalcancer +hg19 hsa05211 hsa05211 - Renalcell carcinoma +hg19 hsa05212 hsa05212 - Pancreaticcancer +hg19 hsa05213 hsa05213 - Endometrialcancer +hg19 hsa05214 hsa05214 - Glioma +hg19 hsa05215 hsa05215 - Prostatecancer +hg19 hsa05216 hsa05216 - Thyroidcancer +hg19 hsa05217 hsa05217 - Basalcell carcinoma +hg19 hsa05218 hsa05218 - Melanoma +hg19 hsa05219 hsa05219 - Bladdercancer +hg19 hsa05220 hsa05220 - Chronicmyeloid leukemia +hg19 hsa05221 hsa05221 - Acutemyeloid leukemia +hg19 hsa05222 hsa05222 - Smallcell lung cancer +hg19 hsa05223 hsa05223 - Non-smallcell lung cancer +hg19 hsa05310 hsa05310 - Asthma +hg19 hsa05320 hsa05320 - Autoimmunethyroid disease +hg19 hsa05322 hsa05322 - Systemiclupus erythematosus +hg19 hsa05323 hsa05323 - Rheumatoidarthritis +hg19 hsa05330 hsa05330 - Allograftrejection +hg19 hsa05332 hsa05332 - Graft-versus-hostdisease +hg19 hsa05340 hsa05340 - Primaryimmunodeficiency +hg19 hsa05410 hsa05410 - Hypertrophiccardiomyopathy (HCM) +hg19 hsa05412 hsa05412 - Arrhythmogenicright ventricular cardiomyopathy (ARVC) +hg19 hsa05414 hsa05414 - Dilatedcardiomyopathy +hg19 hsa05416 hsa05416 - Viralmyocarditis +canFam2 cfa00010 cfa00010 - Glycolysis/ Gluconeogenesis +canFam2 cfa00020 cfa00020 - Citratecycle (TCA cycle) +canFam2 cfa00030 cfa00030 - Pentosephosphate pathway +canFam2 cfa00040 cfa00040 - Pentoseand glucuronate interconversions +canFam2 cfa00051 cfa00051 - Fructoseand mannose metabolism +canFam2 cfa00052 cfa00052 - Galactosemetabolism +canFam2 cfa00053 cfa00053 - Ascorbateand aldarate metabolism +canFam2 cfa00061 cfa00061 - Fattyacid biosynthesis +canFam2 cfa00062 cfa00062 - Fattyacid elongation in mitochondria +canFam2 cfa00071 cfa00071 - Fattyacid metabolism +canFam2 cfa00072 cfa00072 - Synthesisand degradation of ketone bodies +canFam2 cfa00100 cfa00100 - Steroidbiosynthesis +canFam2 cfa00120 cfa00120 - Primarybile acid biosynthesis +canFam2 cfa00130 cfa00130 - Ubiquinoneand other terpenoid-quinone biosynthesis +canFam2 cfa00140 cfa00140 - Steroidhormone biosynthesis +canFam2 cfa00190 cfa00190 - Oxidativephosphorylation +canFam2 cfa00230 cfa00230 - Purinemetabolism +canFam2 cfa00232 cfa00232 - Caffeinemetabolism +canFam2 cfa00240 cfa00240 - Pyrimidinemetabolism +canFam2 cfa00250 cfa00250 - Alanine,aspartate and glutamate metabolism +canFam2 cfa00260 cfa00260 - Glycine,serine and threonine metabolism +canFam2 cfa00270 cfa00270 - Cysteineand methionine metabolism +canFam2 cfa00280 cfa00280 - Valine,leucine and isoleucine degradation +canFam2 cfa00290 cfa00290 - Valine,leucine and isoleucine biosynthesis +canFam2 cfa00300 cfa00300 - Lysinebiosynthesis +canFam2 cfa00310 cfa00310 - Lysinedegradation +canFam2 cfa00330 cfa00330 - Arginineand proline metabolism +canFam2 cfa00340 cfa00340 - Histidinemetabolism +canFam2 cfa00350 cfa00350 - Tyrosinemetabolism +canFam2 cfa00360 cfa00360 - Phenylalaninemetabolism +canFam2 cfa00380 cfa00380 - Tryptophanmetabolism +canFam2 cfa00400 cfa00400 - Phenylalanine,tyrosine and tryptophan biosynthesis +canFam2 cfa00410 cfa00410 - beta-Alaninemetabolism +canFam2 cfa00430 cfa00430 - Taurineand hypotaurine metabolism +canFam2 cfa00450 cfa00450 - Selenocompoundmetabolism +canFam2 cfa00460 cfa00460 - Cyanoaminoacid metabolism +canFam2 cfa00472 cfa00472 - D-Arginineand D-ornithine metabolism +canFam2 cfa00480 cfa00480 - Glutathionemetabolism +canFam2 cfa00500 cfa00500 - Starchand sucrose metabolism +canFam2 cfa00510 cfa00510 - N-Glycanbiosynthesis +canFam2 cfa00511 cfa00511 - Otherglycan degradation +canFam2 cfa00512 cfa00512 - Mucintype O-Glycan biosynthesis +canFam2 cfa00514 cfa00514 - Othertypes of O-glycan biosynthesis +canFam2 cfa00520 cfa00520 - Aminosugar and nucleotide sugar metabolism +canFam2 cfa00531 cfa00531 - Glycosaminoglycandegradation +canFam2 cfa00532 cfa00532 - Glycosaminoglycanbiosynthesis - chondroitin sulfate +canFam2 cfa00533 cfa00533 - Glycosaminoglycanbiosynthesis - keratan sulfate +canFam2 cfa00534 cfa00534 - Glycosaminoglycanbiosynthesis - heparan sulfate +canFam2 cfa00561 cfa00561 - Glycerolipidmetabolism +canFam2 cfa00562 cfa00562 - Inositolphosphate metabolism +canFam2 cfa00563 cfa00563 - Glycosylphosphatidylinositol(GPI)-anchorbiosynthesis +canFam2 cfa00564 cfa00564 - Glycerophospholipidmetabolism +canFam2 cfa00565 cfa00565 - Etherlipid metabolism +canFam2 cfa00590 cfa00590 - Arachidonicacid metabolism +canFam2 cfa00591 cfa00591 - Linoleicacid metabolism +canFam2 cfa00592 cfa00592 - alpha-Linolenicacid metabolism +canFam2 cfa00600 cfa00600 - Sphingolipidmetabolism +canFam2 cfa00601 cfa00601 - Glycosphingolipidbiosynthesis - lacto and neolacto series +canFam2 cfa00603 cfa00603 - Glycosphingolipidbiosynthesis - globo series +canFam2 cfa00604 cfa00604 - Glycosphingolipidbiosynthesis - ganglio series +canFam2 cfa00620 cfa00620 - Pyruvatemetabolism +canFam2 cfa00630 cfa00630 - Glyoxylateand dicarboxylate metabolism +canFam2 cfa00640 cfa00640 - Propanoatemetabolism +canFam2 cfa00650 cfa00650 - Butanoatemetabolism +canFam2 cfa00670 cfa00670 - Onecarbon pool by folate +canFam2 cfa00730 cfa00730 - Thiaminemetabolism +canFam2 cfa00740 cfa00740 - Riboflavinmetabolism +canFam2 cfa00750 cfa00750 - VitaminB6 metabolism +canFam2 cfa00760 cfa00760 - Nicotinateand nicotinamide metabolism +canFam2 cfa00770 cfa00770 - Pantothenateand CoA biosynthesis +canFam2 cfa00780 cfa00780 - Biotinmetabolism +canFam2 cfa00785 cfa00785 - Lipoicacid metabolism +canFam2 cfa00790 cfa00790 - Folatebiosynthesis +canFam2 cfa00830 cfa00830 - Retinolmetabolism +canFam2 cfa00860 cfa00860 - Porphyrinand chlorophyll metabolism +canFam2 cfa00900 cfa00900 - Terpenoidbackbone biosynthesis +canFam2 cfa00910 cfa00910 - Nitrogenmetabolism +canFam2 cfa00920 cfa00920 - Sulfurmetabolism +canFam2 cfa00970 cfa00970 - Aminoacyl-tRNAbiosynthesis +canFam2 cfa00980 cfa00980 - Metabolismof xenobiotics by cytochrome P450 +canFam2 cfa00982 cfa00982 - Drugmetabolism - cytochrome P450 +canFam2 cfa00983 cfa00983 - Drugmetabolism - other enzymes +canFam2 cfa01040 cfa01040 - Biosynthesisof unsaturated fatty acids +canFam2 cfa01100 cfa01100 - Metabolicpathways +canFam2 cfa02010 cfa02010 - ABCtransporters +canFam2 cfa03008 cfa03008 - Ribosomebiogenesis in eukaryotes +canFam2 cfa03010 cfa03010 - Ribosome +canFam2 cfa03013 cfa03013 - RNAtransport +canFam2 cfa03015 cfa03015 - mRNAsurveillance pathway +canFam2 cfa03018 cfa03018 - RNAdegradation +canFam2 cfa03020 cfa03020 - RNApolymerase +canFam2 cfa03022 cfa03022 - Basaltranscription factors +canFam2 cfa03030 cfa03030 - DNAreplication +canFam2 cfa03040 cfa03040 - Spliceosome +canFam2 cfa03050 cfa03050 - Proteasome +canFam2 cfa03060 cfa03060 - Proteinexport +canFam2 cfa03320 cfa03320 - PPARsignaling pathway +canFam2 cfa03410 cfa03410 - Baseexcision repair +canFam2 cfa03420 cfa03420 - Nucleotideexcision repair +canFam2 cfa03430 cfa03430 - Mismatchrepair +canFam2 cfa03440 cfa03440 - Homologousrecombination +canFam2 cfa03450 cfa03450 - Non-homologousend-joining +canFam2 cfa03460 cfa03460 - Fanconianemia pathway +canFam2 cfa04010 cfa04010 - MAPKsignaling pathway +canFam2 cfa04012 cfa04012 - ErbBsignaling pathway +canFam2 cfa04020 cfa04020 - Calciumsignaling pathway +canFam2 cfa04060 cfa04060 - Cytokine-cytokinereceptor interaction +canFam2 cfa04062 cfa04062 - Chemokinesignaling pathway +canFam2 cfa04070 cfa04070 - Phosphatidylinositolsignaling system +canFam2 cfa04080 cfa04080 - Neuroactiveligand-receptor interaction +canFam2 cfa04110 cfa04110 - Cellcycle +canFam2 cfa04114 cfa04114 - Oocytemeiosis +canFam2 cfa04115 cfa04115 - p53signaling pathway +canFam2 cfa04120 cfa04120 - Ubiquitinmediated proteolysis +canFam2 cfa04122 cfa04122 - Sulfurrelay system +canFam2 cfa04130 cfa04130 - SNAREinteractions in vesicular transport +canFam2 cfa04140 cfa04140 - Regulationof autophagy +canFam2 cfa04141 cfa04141 - Proteinprocessing in endoplasmic reticulum +canFam2 cfa04142 cfa04142 - Lysosome +canFam2 cfa04144 cfa04144 - Endocytosis +canFam2 cfa04145 cfa04145 - Phagosome +canFam2 cfa04146 cfa04146 - Peroxisome +canFam2 cfa04150 cfa04150 - mTORsignaling pathway +canFam2 cfa04210 cfa04210 - Apoptosis +canFam2 cfa04260 cfa04260 - Cardiacmuscle contraction +canFam2 cfa04270 cfa04270 - Vascularsmooth muscle contraction +canFam2 cfa04310 cfa04310 - Wntsignaling pathway +canFam2 cfa04320 cfa04320 - Dorso-ventralaxis formation +canFam2 cfa04330 cfa04330 - Notchsignaling pathway +canFam2 cfa04340 cfa04340 - Hedgehogsignaling pathway +canFam2 cfa04350 cfa04350 - TGF-betasignaling pathway +canFam2 cfa04360 cfa04360 - Axonguidance +canFam2 cfa04370 cfa04370 - VEGFsignaling pathway +canFam2 cfa04380 cfa04380 - Osteoclastdifferentiation +canFam2 cfa04510 cfa04510 - Focaladhesion +canFam2 cfa04512 cfa04512 - ECM-receptorinteraction +canFam2 cfa04514 cfa04514 - Celladhesion molecules (CAMs) +canFam2 cfa04520 cfa04520 - Adherensjunction +canFam2 cfa04530 cfa04530 - Tightjunction +canFam2 cfa04540 cfa04540 - Gapjunction +canFam2 cfa04610 cfa04610 - Complementand coagulation cascades +canFam2 cfa04612 cfa04612 - Antigenprocessing and presentation +canFam2 cfa04614 cfa04614 - Renin-angiotensinsystem +canFam2 cfa04620 cfa04620 - Toll-likereceptor signaling pathway +canFam2 cfa04621 cfa04621 - NOD-likereceptor signaling pathway +canFam2 cfa04622 cfa04622 - RIG-I-likereceptor signaling pathway +canFam2 cfa04623 cfa04623 - CytosolicDNA-sensing pathway +canFam2 cfa04630 cfa04630 - Jak-STATsignaling pathway +canFam2 cfa04640 cfa04640 - Hematopoieticcell lineage +canFam2 cfa04650 cfa04650 - Naturalkiller cell mediated cytotoxicity +canFam2 cfa04660 cfa04660 - Tcell receptor signaling pathway +canFam2 cfa04662 cfa04662 - Bcell receptor signaling pathway +canFam2 cfa04664 cfa04664 - Fcepsilon RI signaling pathway +canFam2 cfa04666 cfa04666 - Fcgamma R-mediated phagocytosis +canFam2 cfa04670 cfa04670 - Leukocytetransendothelial migration +canFam2 cfa04672 cfa04672 - Intestinalimmune network for IgA production +canFam2 cfa04710 cfa04710 - Circadianrhythm - mammal +canFam2 cfa04720 cfa04720 - Long-termpotentiation +canFam2 cfa04721 cfa04721 - Synapticvesicle cycle +canFam2 cfa04722 cfa04722 - Neurotrophinsignaling pathway +canFam2 cfa04724 cfa04724 - Glutamatergicsynapse +canFam2 cfa04725 cfa04725 - Cholinergicsynapse +canFam2 cfa04727 cfa04727 - GABAergicsynapse +canFam2 cfa04728 cfa04728 - Dopaminergicsynapse +canFam2 cfa04730 cfa04730 - Long-termdepression +canFam2 cfa04740 cfa04740 - Olfactorytransduction +canFam2 cfa04742 cfa04742 - Tastetransduction +canFam2 cfa04744 cfa04744 - Phototransduction +canFam2 cfa04810 cfa04810 - Regulationof actin cytoskeleton +canFam2 cfa04910 cfa04910 - Insulinsignaling pathway +canFam2 cfa04912 cfa04912 - GnRHsignaling pathway +canFam2 cfa04914 cfa04914 - Progesterone-mediatedoocyte maturation +canFam2 cfa04916 cfa04916 - Melanogenesis +canFam2 cfa04920 cfa04920 - Adipocytokinesignaling pathway +canFam2 cfa04930 cfa04930 - TypeII diabetes mellitus +canFam2 cfa04940 cfa04940 - TypeI diabetes mellitus +canFam2 cfa04950 cfa04950 - Maturityonset diabetes of the young +canFam2 cfa04960 cfa04960 - Aldosterone-regulatedsodium reabsorption +canFam2 cfa04961 cfa04961 - Endocrineand other factor-regulated calcium reabsorption +canFam2 cfa04962 cfa04962 - Vasopressin-regulatedwater reabsorption +canFam2 cfa04964 cfa04964 - Proximaltubule bicarbonate reclamation +canFam2 cfa04966 cfa04966 - Collectingduct acid secretion +canFam2 cfa04970 cfa04970 - Salivarysecretion +canFam2 cfa04971 cfa04971 - Gastricacid secretion +canFam2 cfa04972 cfa04972 - Pancreaticsecretion +canFam2 cfa04973 cfa04973 - Carbohydratedigestion and absorption +canFam2 cfa04974 cfa04974 - Proteindigestion and absorption +canFam2 cfa04975 cfa04975 - Fatdigestion and absorption +canFam2 cfa04976 cfa04976 - Bilesecretion +canFam2 cfa04977 cfa04977 - Vitamindigestion and absorption +canFam2 cfa04978 cfa04978 - Mineralabsorption +canFam2 cfa05010 cfa05010 - Alzheimer'sdisease +canFam2 cfa05012 cfa05012 - Parkinson'sdisease +canFam2 cfa05014 cfa05014 - Amyotrophiclateral sclerosis (ALS) +canFam2 cfa05016 cfa05016 - Huntington'sdisease +canFam2 cfa05020 cfa05020 - Priondiseases +canFam2 cfa05100 cfa05100 - Bacterialinvasion of epithelial cells +canFam2 cfa05132 cfa05132 - Salmonellainfection +canFam2 cfa05133 cfa05133 - Pertussis +canFam2 cfa05134 cfa05134 - Legionellosis +canFam2 cfa05140 cfa05140 - Leishmaniasis +canFam2 cfa05142 cfa05142 - Chagasdisease (American trypanosomiasis) +canFam2 cfa05143 cfa05143 - Africantrypanosomiasis +canFam2 cfa05144 cfa05144 - Malaria +canFam2 cfa05145 cfa05145 - Toxoplasmosis +canFam2 cfa05146 cfa05146 - Amoebiasis +canFam2 cfa05150 cfa05150 - Staphylococcusaureus infection +canFam2 cfa05152 cfa05152 - Tuberculosis +canFam2 cfa05160 cfa05160 - HepatitisC +canFam2 cfa05162 cfa05162 - Measles +canFam2 cfa05164 cfa05164 - InfluenzaA +canFam2 cfa05166 cfa05166 - HTLV-Iinfection +canFam2 cfa05168 cfa05168 - Herpessimplex infection +canFam2 cfa05200 cfa05200 - Pathwaysin cancer +canFam2 cfa05210 cfa05210 - Colorectalcancer +canFam2 cfa05211 cfa05211 - Renalcell carcinoma +canFam2 cfa05212 cfa05212 - Pancreaticcancer +canFam2 cfa05213 cfa05213 - Endometrialcancer +canFam2 cfa05214 cfa05214 - Glioma +canFam2 cfa05215 cfa05215 - Prostatecancer +canFam2 cfa05216 cfa05216 - Thyroidcancer +canFam2 cfa05217 cfa05217 - Basalcell carcinoma +canFam2 cfa05218 cfa05218 - Melanoma +canFam2 cfa05219 cfa05219 - Bladdercancer +canFam2 cfa05220 cfa05220 - Chronicmyeloid leukemia +canFam2 cfa05221 cfa05221 - Acutemyeloid leukemia +canFam2 cfa05222 cfa05222 - Smallcell lung cancer +canFam2 cfa05223 cfa05223 - Non-smallcell lung cancer +canFam2 cfa05310 cfa05310 - Asthma +canFam2 cfa05320 cfa05320 - Autoimmunethyroid disease +canFam2 cfa05322 cfa05322 - Systemiclupus erythematosus +canFam2 cfa05323 cfa05323 - Rheumatoidarthritis +canFam2 cfa05330 cfa05330 - Allograftrejection +canFam2 cfa05332 cfa05332 - Graft-versus-hostdisease +canFam2 cfa05340 cfa05340 - Primaryimmunodeficiency +canFam2 cfa05410 cfa05410 - Hypertrophiccardiomyopathy (HCM) +canFam2 cfa05412 cfa05412 - Arrhythmogenicright ventricular cardiomyopathy (ARVC) +canFam2 cfa05414 cfa05414 - Dilatedcardiomyopathy +canFam2 cfa05416 cfa05416 - Viralmyocarditis +bosTau4 bta00010 bta00010 - Glycolysis/ Gluconeogenesis +bosTau4 bta00020 bta00020 - Citratecycle (TCA cycle) +bosTau4 bta00030 bta00030 - Pentosephosphate pathway +bosTau4 bta00040 bta00040 - Pentoseand glucuronate interconversions +bosTau4 bta00051 bta00051 - Fructoseand mannose metabolism +bosTau4 bta00052 bta00052 - Galactosemetabolism +bosTau4 bta00053 bta00053 - Ascorbateand aldarate metabolism +bosTau4 bta00061 bta00061 - Fattyacid biosynthesis +bosTau4 bta00062 bta00062 - Fattyacid elongation +bosTau4 bta00071 bta00071 - Fattyacid metabolism +bosTau4 bta00072 bta00072 - Synthesisand degradation of ketone bodies +bosTau4 bta00100 bta00100 - Steroidbiosynthesis +bosTau4 bta00120 bta00120 - Primarybile acid biosynthesis +bosTau4 bta00130 bta00130 - Ubiquinoneand other terpenoid-quinone biosynthesis +bosTau4 bta00140 bta00140 - Steroidhormone biosynthesis +bosTau4 bta00190 bta00190 - Oxidativephosphorylation +bosTau4 bta00230 bta00230 - Purinemetabolism +bosTau4 bta00232 bta00232 - Caffeinemetabolism +bosTau4 bta00240 bta00240 - Pyrimidinemetabolism +bosTau4 bta00250 bta00250 - Alanine,aspartate and glutamate metabolism +bosTau4 bta00260 bta00260 - Glycine,serine and threonine metabolism +bosTau4 bta00270 bta00270 - Cysteineand methionine metabolism +bosTau4 bta00280 bta00280 - Valine,leucine and isoleucine degradation +bosTau4 bta00290 bta00290 - Valine,leucine and isoleucine biosynthesis +bosTau4 bta00300 bta00300 - Lysinebiosynthesis +bosTau4 bta00310 bta00310 - Lysinedegradation +bosTau4 bta00330 bta00330 - Arginineand proline metabolism +bosTau4 bta00340 bta00340 - Histidinemetabolism +bosTau4 bta00350 bta00350 - Tyrosinemetabolism +bosTau4 bta00360 bta00360 - Phenylalaninemetabolism +bosTau4 bta00380 bta00380 - Tryptophanmetabolism +bosTau4 bta00400 bta00400 - Phenylalanine,tyrosine and tryptophan biosynthesis +bosTau4 bta00410 bta00410 - beta-Alaninemetabolism +bosTau4 bta00430 bta00430 - Taurineand hypotaurine metabolism +bosTau4 bta00450 bta00450 - Selenocompoundmetabolism +bosTau4 bta00460 bta00460 - Cyanoaminoacid metabolism +bosTau4 bta00471 bta00471 - D-Glutamineand D-glutamate metabolism +bosTau4 bta00472 bta00472 - D-Arginineand D-ornithine metabolism +bosTau4 bta00480 bta00480 - Glutathionemetabolism +bosTau4 bta00500 bta00500 - Starchand sucrose metabolism +bosTau4 bta00510 bta00510 - N-Glycanbiosynthesis +bosTau4 bta00511 bta00511 - Otherglycan degradation +bosTau4 bta00512 bta00512 - Mucintype O-Glycan biosynthesis +bosTau4 bta00514 bta00514 - Othertypes of O-glycan biosynthesis +bosTau4 bta00520 bta00520 - Aminosugar and nucleotide sugar metabolism +bosTau4 bta00524 bta00524 - Butirosinand neomycin biosynthesis +bosTau4 bta00531 bta00531 - Glycosaminoglycandegradation +bosTau4 bta00532 bta00532 - Glycosaminoglycanbiosynthesis - chondroitin sulfate +bosTau4 bta00533 bta00533 - Glycosaminoglycanbiosynthesis - keratan sulfate +bosTau4 bta00534 bta00534 - Glycosaminoglycanbiosynthesis - heparan sulfate +bosTau4 bta00561 bta00561 - Glycerolipidmetabolism +bosTau4 bta00562 bta00562 - Inositolphosphate metabolism +bosTau4 bta00563 bta00563 - Glycosylphosphatidylinositol(GPI)-anchorbiosynthesis +bosTau4 bta00564 bta00564 - Glycerophospholipidmetabolism +bosTau4 bta00565 bta00565 - Etherlipid metabolism +bosTau4 bta00590 bta00590 - Arachidonicacid metabolism +bosTau4 bta00591 bta00591 - Linoleicacid metabolism +bosTau4 bta00592 bta00592 - alpha-Linolenicacid metabolism +bosTau4 bta00600 bta00600 - Sphingolipidmetabolism +bosTau4 bta00601 bta00601 - Glycosphingolipidbiosynthesis - lacto and neolacto series +bosTau4 bta00603 bta00603 - Glycosphingolipidbiosynthesis - globo series +bosTau4 bta00604 bta00604 - Glycosphingolipidbiosynthesis - ganglio series +bosTau4 bta00620 bta00620 - Pyruvatemetabolism +bosTau4 bta00630 bta00630 - Glyoxylateand dicarboxylate metabolism +bosTau4 bta00640 bta00640 - Propanoatemetabolism +bosTau4 bta00650 bta00650 - Butanoatemetabolism +bosTau4 bta00670 bta00670 - Onecarbon pool by folate +bosTau4 bta00730 bta00730 - Thiaminemetabolism +bosTau4 bta00740 bta00740 - Riboflavinmetabolism +bosTau4 bta00750 bta00750 - VitaminB6 metabolism +bosTau4 bta00760 bta00760 - Nicotinateand nicotinamide metabolism +bosTau4 bta00770 bta00770 - Pantothenateand CoA biosynthesis +bosTau4 bta00780 bta00780 - Biotinmetabolism +bosTau4 bta00785 bta00785 - Lipoicacid metabolism +bosTau4 bta00790 bta00790 - Folatebiosynthesis +bosTau4 bta00830 bta00830 - Retinolmetabolism +bosTau4 bta00860 bta00860 - Porphyrinand chlorophyll metabolism +bosTau4 bta00900 bta00900 - Terpenoidbackbone biosynthesis +bosTau4 bta00910 bta00910 - Nitrogenmetabolism +bosTau4 bta00920 bta00920 - Sulfurmetabolism +bosTau4 bta00970 bta00970 - Aminoacyl-tRNAbiosynthesis +bosTau4 bta00980 bta00980 - Metabolismof xenobiotics by cytochrome P450 +bosTau4 bta00982 bta00982 - Drugmetabolism - cytochrome P450 +bosTau4 bta00983 bta00983 - Drugmetabolism - other enzymes +bosTau4 bta01040 bta01040 - Biosynthesisof unsaturated fatty acids +bosTau4 bta01100 bta01100 - Metabolicpathways +bosTau4 bta02010 bta02010 - ABCtransporters +bosTau4 bta03008 bta03008 - Ribosomebiogenesis in eukaryotes +bosTau4 bta03010 bta03010 - Ribosome +bosTau4 bta03013 bta03013 - RNAtransport +bosTau4 bta03015 bta03015 - mRNAsurveillance pathway +bosTau4 bta03018 bta03018 - RNAdegradation +bosTau4 bta03020 bta03020 - RNApolymerase +bosTau4 bta03022 bta03022 - Basaltranscription factors +bosTau4 bta03030 bta03030 - DNAreplication +bosTau4 bta03040 bta03040 - Spliceosome +bosTau4 bta03050 bta03050 - Proteasome +bosTau4 bta03060 bta03060 - Proteinexport +bosTau4 bta03320 bta03320 - PPARsignaling pathway +bosTau4 bta03410 bta03410 - Baseexcision repair +bosTau4 bta03420 bta03420 - Nucleotideexcision repair +bosTau4 bta03430 bta03430 - Mismatchrepair +bosTau4 bta03440 bta03440 - Homologousrecombination +bosTau4 bta03450 bta03450 - Non-homologousend-joining +bosTau4 bta03460 bta03460 - Fanconianemia pathway +bosTau4 bta04010 bta04010 - MAPKsignaling pathway +bosTau4 bta04012 bta04012 - ErbBsignaling pathway +bosTau4 bta04020 bta04020 - Calciumsignaling pathway +bosTau4 bta04060 bta04060 - Cytokine-cytokinereceptor interaction +bosTau4 bta04062 bta04062 - Chemokinesignaling pathway +bosTau4 bta04070 bta04070 - Phosphatidylinositolsignaling system +bosTau4 bta04080 bta04080 - Neuroactiveligand-receptor interaction +bosTau4 bta04110 bta04110 - Cellcycle +bosTau4 bta04114 bta04114 - Oocytemeiosis +bosTau4 bta04115 bta04115 - p53signaling pathway +bosTau4 bta04120 bta04120 - Ubiquitinmediated proteolysis +bosTau4 bta04122 bta04122 - Sulfurrelay system +bosTau4 bta04130 bta04130 - SNAREinteractions in vesicular transport +bosTau4 bta04140 bta04140 - Regulationof autophagy +bosTau4 bta04141 bta04141 - Proteinprocessing in endoplasmic reticulum +bosTau4 bta04142 bta04142 - Lysosome +bosTau4 bta04144 bta04144 - Endocytosis +bosTau4 bta04145 bta04145 - Phagosome +bosTau4 bta04146 bta04146 - Peroxisome +bosTau4 bta04150 bta04150 - mTORsignaling pathway +bosTau4 bta04210 bta04210 - Apoptosis +bosTau4 bta04260 bta04260 - Cardiacmuscle contraction +bosTau4 bta04270 bta04270 - Vascularsmooth muscle contraction +bosTau4 bta04310 bta04310 - Wntsignaling pathway +bosTau4 bta04320 bta04320 - Dorso-ventralaxis formation +bosTau4 bta04330 bta04330 - Notchsignaling pathway +bosTau4 bta04340 bta04340 - Hedgehogsignaling pathway +bosTau4 bta04350 bta04350 - TGF-betasignaling pathway +bosTau4 bta04360 bta04360 - Axonguidance +bosTau4 bta04370 bta04370 - VEGFsignaling pathway +bosTau4 bta04380 bta04380 - Osteoclastdifferentiation +bosTau4 bta04510 bta04510 - Focaladhesion +bosTau4 bta04512 bta04512 - ECM-receptorinteraction +bosTau4 bta04514 bta04514 - Celladhesion molecules (CAMs) +bosTau4 bta04520 bta04520 - Adherensjunction +bosTau4 bta04530 bta04530 - Tightjunction +bosTau4 bta04540 bta04540 - Gapjunction +bosTau4 bta04610 bta04610 - Complementand coagulation cascades +bosTau4 bta04612 bta04612 - Antigenprocessing and presentation +bosTau4 bta04614 bta04614 - Renin-angiotensinsystem +bosTau4 bta04620 bta04620 - Toll-likereceptor signaling pathway +bosTau4 bta04621 bta04621 - NOD-likereceptor signaling pathway +bosTau4 bta04622 bta04622 - RIG-I-likereceptor signaling pathway +bosTau4 bta04623 bta04623 - CytosolicDNA-sensing pathway +bosTau4 bta04630 bta04630 - Jak-STATsignaling pathway +bosTau4 bta04640 bta04640 - Hematopoieticcell lineage +bosTau4 bta04650 bta04650 - Naturalkiller cell mediated cytotoxicity +bosTau4 bta04660 bta04660 - Tcell receptor signaling pathway +bosTau4 bta04662 bta04662 - Bcell receptor signaling pathway +bosTau4 bta04664 bta04664 - Fcepsilon RI signaling pathway +bosTau4 bta04666 bta04666 - Fcgamma R-mediated phagocytosis +bosTau4 bta04670 bta04670 - Leukocytetransendothelial migration +bosTau4 bta04672 bta04672 - Intestinalimmune network for IgA production +bosTau4 bta04710 bta04710 - Circadianrhythm - mammal +bosTau4 bta04720 bta04720 - Long-termpotentiation +bosTau4 bta04721 bta04721 - Synapticvesicle cycle +bosTau4 bta04722 bta04722 - Neurotrophinsignaling pathway +bosTau4 bta04724 bta04724 - Glutamatergicsynapse +bosTau4 bta04725 bta04725 - Cholinergicsynapse +bosTau4 bta04727 bta04727 - GABAergicsynapse +bosTau4 bta04728 bta04728 - Dopaminergicsynapse +bosTau4 bta04730 bta04730 - Long-termdepression +bosTau4 bta04740 bta04740 - Olfactorytransduction +bosTau4 bta04742 bta04742 - Tastetransduction +bosTau4 bta04744 bta04744 - Phototransduction +bosTau4 bta04810 bta04810 - Regulationof actin cytoskeleton +bosTau4 bta04910 bta04910 - Insulinsignaling pathway +bosTau4 bta04912 bta04912 - GnRHsignaling pathway +bosTau4 bta04914 bta04914 - Progesterone-mediatedoocyte maturation +bosTau4 bta04916 bta04916 - Melanogenesis +bosTau4 bta04920 bta04920 - Adipocytokinesignaling pathway +bosTau4 bta04930 bta04930 - TypeII diabetes mellitus +bosTau4 bta04940 bta04940 - TypeI diabetes mellitus +bosTau4 bta04950 bta04950 - Maturityonset diabetes of the young +bosTau4 bta04960 bta04960 - Aldosterone-regulatedsodium reabsorption +bosTau4 bta04961 bta04961 - Endocrineand other factor-regulated calcium reabsorption +bosTau4 bta04962 bta04962 - Vasopressin-regulatedwater reabsorption +bosTau4 bta04964 bta04964 - Proximaltubule bicarbonate reclamation +bosTau4 bta04966 bta04966 - Collectingduct acid secretion +bosTau4 bta04970 bta04970 - Salivarysecretion +bosTau4 bta04971 bta04971 - Gastricacid secretion +bosTau4 bta04972 bta04972 - Pancreaticsecretion +bosTau4 bta04973 bta04973 - Carbohydratedigestion and absorption +bosTau4 bta04974 bta04974 - Proteindigestion and absorption +bosTau4 bta04975 bta04975 - Fatdigestion and absorption +bosTau4 bta04976 bta04976 - Bilesecretion +bosTau4 bta04977 bta04977 - Vitamindigestion and absorption +bosTau4 bta04978 bta04978 - Mineralabsorption +bosTau4 bta05010 bta05010 - Alzheimer'sdisease +bosTau4 bta05012 bta05012 - Parkinson'sdisease +bosTau4 bta05014 bta05014 - Amyotrophiclateral sclerosis (ALS) +bosTau4 bta05016 bta05016 - Huntington'sdisease +bosTau4 bta05020 bta05020 - Priondiseases +bosTau4 bta05100 bta05100 - Bacterialinvasion of epithelial cells +bosTau4 bta05132 bta05132 - Salmonellainfection +bosTau4 bta05133 bta05133 - Pertussis +bosTau4 bta05134 bta05134 - Legionellosis +bosTau4 bta05140 bta05140 - Leishmaniasis +bosTau4 bta05142 bta05142 - Chagasdisease (American trypanosomiasis) +bosTau4 bta05143 bta05143 - Africantrypanosomiasis +bosTau4 bta05144 bta05144 - Malaria +bosTau4 bta05145 bta05145 - Toxoplasmosis +bosTau4 bta05146 bta05146 - Amoebiasis +bosTau4 bta05150 bta05150 - Staphylococcusaureus infection +bosTau4 bta05152 bta05152 - Tuberculosis +bosTau4 bta05160 bta05160 - HepatitisC +bosTau4 bta05162 bta05162 - Measles +bosTau4 bta05164 bta05164 - InfluenzaA +bosTau4 bta05166 bta05166 - HTLV-Iinfection +bosTau4 bta05168 bta05168 - Herpessimplex infection +bosTau4 bta05200 bta05200 - Pathwaysin cancer +bosTau4 bta05202 bta05202 - Transcriptionalmisregulation in cancers +bosTau4 bta05210 bta05210 - Colorectalcancer +bosTau4 bta05211 bta05211 - Renalcell carcinoma +bosTau4 bta05212 bta05212 - Pancreaticcancer +bosTau4 bta05213 bta05213 - Endometrialcancer +bosTau4 bta05214 bta05214 - Glioma +bosTau4 bta05215 bta05215 - Prostatecancer +bosTau4 bta05216 bta05216 - Thyroidcancer +bosTau4 bta05217 bta05217 - Basalcell carcinoma +bosTau4 bta05218 bta05218 - Melanoma +bosTau4 bta05219 bta05219 - Bladdercancer +bosTau4 bta05220 bta05220 - Chronicmyeloid leukemia +bosTau4 bta05221 bta05221 - Acutemyeloid leukemia +bosTau4 bta05222 bta05222 - Smallcell lung cancer +bosTau4 bta05223 bta05223 - Non-smallcell lung cancer +bosTau4 bta05310 bta05310 - Asthma +bosTau4 bta05320 bta05320 - Autoimmunethyroid disease +bosTau4 bta05322 bta05322 - Systemiclupus erythematosus +bosTau4 bta05323 bta05323 - Rheumatoidarthritis +bosTau4 bta05330 bta05330 - Allograftrejection +bosTau4 bta05332 bta05332 - Graft-versus-hostdisease +bosTau4 bta05340 bta05340 - Primaryimmunodeficiency +bosTau4 bta05410 bta05410 - Hypertrophiccardiomyopathy (HCM) +bosTau4 bta05412 bta05412 - Arrhythmogenicright ventricular cardiomyopathy (ARVC) +bosTau4 bta05414 bta05414 - Dilatedcardiomyopathy +bosTau4 bta05416 bta05416 - Viralmyocarditis
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/tool-data/gd.primers.loc.sample Fri Sep 28 11:35:56 2012 -0400 @@ -0,0 +1,6 @@ +#<species> <primers_file_path> +#aye-aye /galaxy/local_data/genome_diversity/primers/aye-aye_Galaxy_primers.txt +#bear /galaxy/local_data/genome_diversity/primers/bear_Galaxy_primers.txt +#bighorn /galaxy/local_data/genome_diversity/primers/bighorn_Galaxy_primers.txt +#tasmanian_devil /galaxy/local_data/genome_diversity/primers/devil_Galaxy_primers.txt +#tick /galaxy/local_data/genome_diversity/primers/tick_Galaxy_primers.txt
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/tool-data/gd.rank.loc.sample Fri Sep 28 11:35:56 2012 -0400 @@ -0,0 +1,4 @@ +#<species> <prefix> <kxml_dir_path> <path_to_dict_file> +#hg19 hsa /galaxy/local_data/genome_diversity/rank/KXML_hsa.d /galaxy/local_data/genome_diversity/rank/hsa_dict.txt +#canFam2 cfa /galaxy/local_data/genome_diversity/rank/KXML_cfa.d /galaxy/local_data/genome_diversity/rank/cfa_dict.txt +#bosTau4 bta /galaxy/local_data/genome_diversity/rank/KXML_bta.d /galaxy/local_data/genome_diversity/rank/bta_dict.txt
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/tool-data/gd.ref_species.txt.sample Fri Sep 28 11:35:56 2012 -0400 @@ -0,0 +1,4 @@ +# genome diversity species +cow cow +hg19 hg19 +dog dog
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/tool-data/gd.restriction_enzymes.txt.sample Fri Sep 28 11:35:56 2012 -0400 @@ -0,0 +1,99 @@ +Acc65I - GGTACC Acc65I +AccB7I - CCANNNNNTGG AccB7I +AccI - GT(A/C)(G/T)AC AccI +AccIII - TCCGGA AccIII +AcyI - G(A/G)CG(C/T)C AcyI +AgeI - ACCGGT AgeI +AluI - AGCT AluI +Alw44I - GTGCAC Alw44I +ApaI - GGGCCC ApaI +AvaI - C(C/T)CG(A/G)G AvaI +AvaII - GG(A/T)CC AvaII +BalI - TGGCCA BalI +BamHI - GGATCC BamHI +BanI - GG(C/T)(A/G)CC BanI +BanII - G(A/G)GC(C/T)C BanII +BbuI - GCATGC BbuI +BclI - TGATCA BclI +BglI - GCCNNNNNGGC BglI +BglII - AGATCT BglII +BsaMI - GAATGC BsaMI +BsaOI - CG(A/G)(C/T)CG BsaOI +Bsp1286I - G(A/G/T)GC(A/C/T)C Bsp1286I +BsrBRI - GATNNNNATC BsrBRI +BsrSI - ACTGG BsrSI +BssHII - GCGCGC BssHII +Bst98I - CTTAAG Bst98I +BstEII - GGTNACC BstEII +BstOI - CC(A/T)GG BstOI +BstXI - CCANNNNNNTGG BstXI +BstZI - CGGCCG BstZI +Bsu36I - CCTNAGG Bsu36I +CfoI - GCGC CfoI +ClaI - ATCGAT ClaI +Csp45I - TTCGAA Csp45I +CspI - CGG(A/T)CCG CspI +DdeI - CTNAG DdeI +DpnI - GATC DpnI +DraI - TTTAAA DraI +EclHKI - GACNNNNNGTC EclHKI +Eco47III - AGCGCT Eco47III +Eco52I - CGGCCG Eco52I +Eco72I - CACGTG Eco72I +EcoRI - GAATTC EcoRI +EcoRV - GATATC EcoRV +HaeII - (A/G)GCGC(C/T) HaeII +HaeIII - GGCC HaeIII +HhaI - GCGC HhaI +HincII - GT(C/T)(A/G)AC HincII +HindIII - AAGCTT HindIII +HinfI - GANTC HinfI +HpaI - GTTAAC HpaI +HpaII - CCGG HpaII +Hsp92I - G(A/G)CG(C/T)C Hsp92I +Hsp92II - CATG Hsp92II +I-PpoI - TAACTATGACTCTCTTAAGGTAGCCAAAT I-PpoI +KpnI - GGTACC KpnI +MboI - GATC MboI +MluI - ACGCGT MluI +MspA1I - C(A/C)GC(G/T)G MspA1I +MspI - CCGG MspI +NaeI - GCCGGC NaeI +NarI - GGCGCC NarI +NciI - CC(C/G)GG NciI +NcoI - CCATGG NcoI +NdeI - CATATG NdeI +NgoMIV - GCCGGC NgoMIV +NheI - GCTAGC NheI +NotI - GCGGCCGC NotI +NruI - TCGCGA NruI +NsiI - ATGCAT NsiI +PstI - CTGCAG PstI +PvuI - CGATCG PvuI +PvuII - CAGCTG PvuII +RsaI - GTAC RsaI +SacI - GAGCTC SacI +SacII - CCGCGG SacII +SalI - GTCGAC SalI +Sau3AI - GATC Sau3AI +Sau96I - GGNCC Sau96I +ScaI - AGTACT ScaI +SfiI - GGCCNNNNNGGCC SfiI +SgfI - GCGATCGC SgfI +SinI - GG(A/T)CC SinI +SmaI - CCCGGG SmaI +SnaBI - TACGTA SnaBI +SpeI - ACTAGT SpeI +SphI - GCATGC SphI +SspI - AATATT SspI +StuI - AGGCCT StuI +StyI - CC(A/T)(A/T)GG StyI +TaqI - TCGA TaqI +Tru9I - TTAA Tru9I +Tth111I - GACNNNGTC Tth111I +VspI - ATTAAT VspI +XbaI - TCTAGA XbaI +XhoI - CTCGAG XhoI +XhoII - (A/G)GATC(C/T) XhoII +XmaI - CCCGGG XmaI +XmnI - GAANNNNTTC XmnI
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/tool-data/gd.snps.loc.sample Fri Sep 28 11:35:56 2012 -0400 @@ -0,0 +1,3 @@ +#<species> <SNP_call_file_path> +#bighorn /galaxy/local_data/genome_diversity/snps/bighorn_snps.txt +#tasmanian_devil /galaxy/local_data/genome_diversity/snps/devil_snps.txt