Mercurial > repos > yufei-luo > s_mart
view commons/core/coord/Align.py @ 62:8c42a6d7ffd4
Added simple test BED file.
author | m-zytnicki |
---|---|
date | Mon, 19 Oct 2015 11:25:11 +0200 |
parents | 769e306b7933 |
children |
line wrap: on
line source
# Copyright INRA (Institut National de la Recherche Agronomique) # http://www.inra.fr # http://urgi.versailles.inra.fr # # This software is governed by the CeCILL license under French law and # abiding by the rules of distribution of free software. You can use, # modify and/ or redistribute the software under the terms of the CeCILL # license as circulated by CEA, CNRS and INRIA at the following URL # "http://www.cecill.info". # # As a counterpart to the access to the source code and rights to copy, # modify and redistribute granted by the license, users are provided only # with a limited warranty and the software's author, the holder of the # economic rights, and the successive licensors have only limited # liability. # # In this respect, the user's attention is drawn to the risks associated # with loading, using, modifying and/or developing or reproducing the # software by the user in light of its specific status of free software, # that may mean that it is complicated to manipulate, and that also # therefore means that it is reserved for developers and experienced # professionals having in-depth computer knowledge. Users are therefore # encouraged to load and test the software's suitability as regards their # requirements in conditions enabling the security of their systems and/or # data to be ensured and, more generally, to use and operate it in the # same conditions as regards security. # # The fact that you are presently reading this means that you have had # knowledge of the CeCILL license and that you accept its terms. import time from commons.core.coord.Range import Range from commons.core.coord.Map import Map ## Handle a match between two sequences, query and subject (pair of coordinates with E-value, score and identity) # class Align( object ): ## Constructor # # @param range_q: a Range instance for the query # @param range_s: a Range instance for the subject # @param e_value: E-value of the match # @param identity: identity percentage of the match # @param score: score of the match # def __init__(self, range_q=Range(), range_s=Range(), e_value=0, score=0, identity=0): self.range_query = range_q self.range_subject = range_s self.e_value = float(e_value) self.score = float(score) self.identity = float(identity) ## Return True if the instance is empty, False otherwise # def isEmpty(self): return self.range_query.isEmpty() or self.range_subject.isEmpty() ## Equal operator # def __eq__(self, o): if self.range_query==o.range_query and self.range_subject==o.range_subject and \ self.e_value==o.e_value and self.score==o.score and self.identity==o.identity: return True return False ## Unequal operator # # @param o a Range instance # def __ne__(self, o): return not self.__eq__(o) ## Convert the object into a string # # @note used in 'print myObject' # def __str__( self ): return self.toString() ## Read attributes from an Align file # # @param fileHandler: file handler of the file being read # @return: 1 on success, 0 at the end of the file # def read(self, fileHandler): self.reset() line = fileHandler.readline() if line == "": return 0 tokens = line.split("\t") if len(tokens) < len(self.__dict__.keys()): return 0 self.setFromTuple(tokens) return 1 ## Set attributes from tuple # # @param tuple a tuple with (queryName,queryStart,queryEnd,subjectName,subjectStar,subjectEnd,E-value,score,identity) # @note data are loaded such that the query is always on the direct strand # def setFromTuple( self, tuple ): #TODO: we need to create Range instances because of __eq__() and isEmpty() tests, but WHY ??? self.range_query = Range() self.range_subject = Range() if int(tuple[1]) < int(tuple[2]): self.range_query.setFromTuple( ( tuple[0], tuple[1], tuple[2] ) ) self.range_subject.setFromTuple( ( tuple[3], tuple[4], tuple[5] ) ) else: self.range_query.setFromTuple( ( tuple[0], tuple[2], tuple[1] ) ) self.range_subject.setFromTuple( ( tuple[3], tuple[5], tuple[4] ) ) self.e_value = float(tuple[6]) self.score = float(tuple[7]) self.identity = float(tuple[8]) ## Reset # def reset( self ): self.range_query.reset() self.range_subject.reset() self.e_value = 0 self.score = 0 self.identity = 0 ## Return the attributes as a formatted string # def toString(self): string = "%s" % ( self.range_query.toString() ) string += "\t%s" % ( self.range_subject.toString() ) string += "\t%g\t%i\t%f" % ( self.e_value, self.score, self.identity ) return string ## Return the attributes as a GFF-formatted string # def toStringAsGff( self, source="REPET", type="match", phase=".", ID="", Parent="" ): if not self.isSubjectOnDirectStrand(): self.reverse() string = "%s" % ( self.getQueryName() ) string += "\t%s" % ( source ) string += "\t%s" % ( type ) string += "\t%s" % ( self.getQueryMin() ) string += "\t%s" % ( self.getQueryMax() ) string += "\t%g" % ( self.e_value ) string += "\t%s" % ( self.getQueryStrand() ) string += "\t%s" % ( phase ) attributes = "" if ID != "": attributes += "ID=%s" % ( ID ) else: attributes += "ID=%i" % ( str(time.time())[-8:-1].replace(".","") ) if Parent != "": attributes += ";Parent=%s" % ( Parent ) attributes += ";Target=%s %i %i" % ( self.getSubjectName(), self.getSubjectStart(), self.getSubjectEnd() ) string += "\t%s" % ( attributes ) return string ## Reverse query and subject # def reverse(self): self.range_query.reverse() self.range_subject.reverse() ## Show the attributes # def show(self): print self.toString() ## Write attributes into an Align file # # @param fileHandler: file handler of the file being filled # def write(self, fileHandler): fileHandler.write("%s\n" % (self.toString())) ## Save attributes into an Align file # # @param file: name of the file being filled # def save(self, file): fileHandler = open( file, "a" ) self.write( fileHandler ) fileHandler.close() ## Return the score # def getScore(self): return self.score ## Return the identity # def getIdentity(self): return self.identity def getEvalue(self): return self.e_value ## Return the length on the query # def getLengthOnQuery(self): return self.range_query.getLength() ## Return the name of the query # def getQueryName( self ): return self.range_query.seqname ## Return the start of the query # def getQueryStart( self ): return self.range_query.start ## Return the end of the query # def getQueryEnd( self ): return self.range_query.end ## Return the min of the query # def getQueryMin( self ): return self.range_query.getMin() ## Return the max of the query # def getQueryMax( self ): return self.range_query.getMax() ## Return the strand of the query # def getQueryStrand( self ): return self.range_query.getStrand() ## Return the length on the subject # def getLengthOnSubject(self): return self.range_subject.getLength() ## Return the name of the subject # def getSubjectName( self ): return self.range_subject.seqname ## Return the start of the subject # def getSubjectStart( self ): return self.range_subject.start ## Return the end of the subject # def getSubjectEnd( self ): return self.range_subject.end ## Return the min of the subject # def getSubjectMin( self ): return self.range_subject.getMin() ## Return the max of the subject # def getSubjectMax( self ): return self.range_subject.getMax() ## Return the strand of the subject # def getSubjectStrand( self ): return self.range_subject.getStrand() ## Return the query as a Range instance # def getQueryAsRange( self ): return self.range_query ## Return the subject as a Range instance # def getSubjectAsRange( self ): return self.range_subject ## Set the name of the query # def setQueryName( self, name ): self.range_query.seqname = name ## Set the start of the query # def setQueryStart( self, start ): self.range_query.start = start ## Set the end of the query # def setQueryEnd( self, end ): self.range_query.end = end ## Set the name of the subject # def setSubjectName( self, name ): self.range_subject.seqname = name ## Set the start of the subject # def setSubjectStart( self, start ): self.range_subject.start = start ## Set the end of the subject # def setSubjectEnd( self, end ): self.range_subject.end = end ## Merge the instance with another Align instance # # @param o an Align instance # def merge(self, o): if self.range_query.seqname != o.range_query.seqname \ or self.range_subject.seqname != o.range_subject.seqname: return self.range_query.merge(o.range_query) self.range_subject.merge(o.range_subject) self.score = max(self.score,o.score) self.e_value = min(self.e_value,o.e_value) self.identity = max(self.identity,o.identity) ## Return a Map instance with the subject mapped on the query # def getSubjectAsMapOfQuery(self): iMap = Map() iMap.name = self.range_subject.seqname iMap.seqname = self.range_query.seqname if self.range_subject.isOnDirectStrand(): iMap.start = self.range_query.start iMap.end = self.range_query.end else: iMap.start = self.range_query.end iMap.end = self.range_query.start return iMap ## Return True if query is on direct strand # def isQueryOnDirectStrand( self ): return self.range_query.isOnDirectStrand() ## Return True if subject is on direct strand # def isSubjectOnDirectStrand( self ): return self.range_subject.isOnDirectStrand() ## Return True if query and subject are on the same strand, False otherwise # def areQrySbjOnSameStrand(self): return self.isQueryOnDirectStrand() == self.isSubjectOnDirectStrand() ## Return False if query and subject are on the same strand, True otherwise # def areQrySbjOnOppositeStrands(self): return not self.areQrySbjOnSameStrand() ## Set attributes from string # # @param string a string formatted like queryName queryStart queryEnd subjectName subjectStart subjectEnd E-value score identity # @param sep field separator # def setFromString(self, string, sep="\t"): if string[-1] == "\n": string = string[:-1] self.setFromTuple( string.split(sep) ) ## Return a first Map instance for the query and a second for the subject # def getMapsOfQueryAndSubject(self): iMapQuery = Map( name="repet", seqname=self.range_query.seqname, start=self.range_query.start, end=self.range_query.end ) iMapSubject = Map( name="repet", seqname=self.range_subject.seqname, start=self.range_subject.start, end=self.range_subject.end ) return iMapQuery, iMapSubject ## Write query coordinates as Map in a file # # @param fileHandler: file handler of the file being filled # def writeSubjectAsMapOfQuery( self, fileHandler ): m = self.getSubjectAsMapOfQuery() m.write( fileHandler ) ## Return a bin for fast database access # def getBin(self): return self.range_query.getBin() ## Switch query and subject # def switchQuerySubject( self ): tmpRange = self.range_query self.range_query = self.range_subject self.range_subject = tmpRange if not self.isQueryOnDirectStrand(): self.reverse() ## Return True if the query overlaps with the query of another Align instance, False otherwise # def isQueryOverlapping( self, iAlign ): return self.getQueryAsRange().isOverlapping( iAlign.getQueryAsRange() ) ## Return True if the subject overlaps with the subject of another Align instance, False otherwise # def isSubjectOverlapping( self, iAlign ): return self.getSubjectAsRange().isOverlapping( iAlign.getSubjectAsRange() ) ## Return True if the Align instance overlaps with another Align instance, False otherwise # def isOverlapping( self, iAlign ): if self.isQueryOverlapping( iAlign ) and self.isSubjectOverlapping( iAlign ): return True else: return False ## Update the score # # @note the new score is the length on the query times the percentage of identity # def updateScore( self ): newScore = self.getLengthOnQuery() * self.getIdentity() / 100.0 self.score = newScore