#! /usr/bin/env python
import sys
import random
import MySQLdb
from transcript import *
from transcriptList import *
from mySqlConnection import *
from mySqlTable import *
from mySqlTranscriptTable import *
from progress import *



class TranscriptListsComparator2(object):
  """
  Compare two transcript lists, using a database for one of the list
  @ivar transcriptListParsers:   parser to the list of query transcripts
  @type transcriptListParsers:   list of 2 L{TranscriptListParser<TranscriptListParser>}
  @ivar transcriptOriginalBases: database for the original transcripts (for query/reference, for each chromosome)
  @type transcriptOriginalBases: list of 2 dict of chromsomes to L{MySqlTranscriptTable<MySqlTranscriptTable>}
  @ivar transcriptWorkingBases:  database for the modified working transcripts (for query/reference, for each chromosome)
  @type transcriptWorkingBases:  list of 2 dict of chromsomes to L{MySqlTranscriptTable<MySqlTranscriptTable>}
  @ivar mySqlConnection:         connection to a MySQL database (to compute the ovelapping efficiently)
  @type mySqlConnection:         class L{MySqlConnection<MySqlConnection>}
  @ivar introns:                 compare transcripts or exons only
  @type introns:                 list of 2 boolean
  @ivar starts:                  restrict the query transcripts to first nucleotides
  @type starts:                  list of 2 int or None
  @ivar fivePrimes:              extend a list of transcripts by their 5' end
  @type fivePrimes:              list of 2 int or None
  @ivar threePrimes:             extend a list of transcripts by their 3' end
  @type threePrimes:             list of 2 int or None
  @ivar distance:                distance between two transcripts [default: 0]
  @type distance:                int
  @ivar colinear:                whether transcripts should overlap in the same direction
  @type colinear:                boolean
  @ivar antisense:               whether transcripts should overlap in the opposite direction
  @type antisense:               boolean
  @ivar QUERY:                   constant specifying the query objects
  @type QUERY:                   int
  @ivar REFERENCE:               constant specifying the reference objects
  @type REFERENCE:               int
  @ivar TYPES:                   set of types of data (query or reference) objects
  @type TYPES:                   list of 2 int
  @ivar nbTranscripts:           number of transcript in the query/reference set
  @type nbTranscripts:           list of 2 int or None
  @ivar nbNucleotides:           number of nucleotides in the query/reference set
  @type nbNucleotides:           list of 2 int or None
  @ivar transcriptJoins:         join with transcripts of query/reference set
  @type transcriptJoins:         list of 2 boolean
  @ivar exonJoins:               join with exons of query/reference set
  @type exonJoins:               list of 2 boolean
  @ivar uniqueOutputs:           output unique results
  @type uniqueOutputs:           boolean
  @ivar logHandle:               log handle
  @type logHandle:               file
  @ivar verbosity:               verbosity
  @type verbosity:               int
  """
  
  def __init__(self, logHandle = None, verbosity = 0):
    """
    Constructor
    @param transcriptListParser2: parser to the list of reference transcripts
    @type  transcriptListParser2: class L{TranscriptListParser<TranscriptListParser>}
    @param logHandle:             log handle
    @type  logHandle:             file
    @param verbosity:             verbosity
    @type  verbosity:             int
    """
    self.logHandle               = logHandle
    self.verbosity               = verbosity
    self.mySqlConnection         = MySqlConnection("localhost", "mzytnick", "", "bioinfo", self.verbosity)
    self.transcriptListParsers   = [None, None]
    self.transcriptOriginalBases = [{}, {}]
    self.transcriptWorkingBases  = [{}, {}]
    self.introns                 = [False, False]
    self.starts                  = [None, None]
    self.fivePrimes              = [None, None]
    self.threePrimes             = [None, None]
    self.distance                = 0
    self.colinear                = False
    self.antisense               = False
    self.nbTranscripts           = [None, None]
    self.nbNucleotides           = [None, None]
    self.uniqueOutput            = True
    self.transcriptJoins         = [True, False]
    self.exonJoins             = [False, False]
    self.QUERY                   = 0
    self.REFERENCE               = 1
    self.TYPES                   = (self.QUERY, self.REFERENCE)


  def setList(self, type, transcriptListParser):
    """
    Accessor to transcriptListParsers
    @param type:                 whether use query/reference parser
    @type  type:                 int
    @param transcriptListParser: parser to the list of query/reference transcripts
    @type  transcriptListParser: class L{TranscriptListParser<TranscriptListParser>}
    """
    self.transcriptListParsers[type] = transcriptListParser

    if self.verbosity > 0:
      print "Scanning file '%s'..." % (transcriptListParser.fileName)
    transcriptListParser.getInfos()
    if self.verbosity > 0:
      print "... done"

    self.nbTranscripts[type] = transcriptListParser.getNbTranscripts()
    self.nbNucleotides[type] = transcriptListParser.getSize()

    transcriptListParser.reset()
    progress = Progress(transcriptListParser.getNbTranscripts(), "Storing from %s" % (transcriptListParser.fileName), self.verbosity)
    for transcript in transcriptListParser.getIterator():
      chromosome = transcript.chromosome
      if chromosome not in self.transcriptOriginalBases[type]:
        self.transcriptOriginalBases[type][chromosome] = MySqlTranscriptTable("%s_%d" % (chromosome, random.randint(0, 100000)), self.mySqlConnection, self.verbosity)
      self.transcriptOriginalBases[type][chromosome].addTranscript(transcript)
      progress.inc()
    progress.done()
    
    for chromosome in self.transcriptOriginalBases[type]:
      self.transcriptWorkingBases[type][chromosome] = MySqlTranscriptTable("%s_%d" % (chromosome, random.randint(0, 100000)), self.mySqlConnection, self.verbosity)
      self.transcriptWorkingBases[type][chromosome].copy(self.transcriptOriginalBases[type][chromosome])
    

  def acceptIntron(self, type, bool):
    """
    Compare transcripts or exons only
    @param type: whether use query/reference data
    @type  type: int
    @param bool: include introns or not
    @type  bool: boolean
    """
    self.introns[type] = bool

    
  def restrictToStart(self, type, size):
    """
    Restrict a list of transcripts to first nucleotides
    @param type: whether use query/reference data
    @type  type: int
    @param size: the size of the transcript to be considered
    @type  size: int
    """
    self.starts[type]  = size
    self.introns[type] = False
    
    
  def extendFivePrime(self, type, size):
    """
    Extend a list of transcripts by their 5' end
    @param type: whether use query/reference data
    @type  type: int
    @param size: size of the extension
    @type  size: int
    """
    self.fivePrimes[type] = size
    

  def extendThreePrime(self, size):
    """
    Extend the list of query transcripts by their 3' end
    @param type: whether use query/reference data
    @type  type: int
    @param size: size of the extension
    @type  size: int
    """
    self.threePrimes[type] = size
    
    
  def setDistance(self, distance):
    """
    Set the distance between two transcripts
    @param distance: distance
    @type  distance: int
    """
    self.distance = distance
    

  def getColinearOnly(self, boolean):
    """
    Only consider transcripts that overlap in the same direction
    @param boolean: whether transcripts should overlap in the same direction
    @type  boolean: boolean
    """
    self.colinear = boolean
    
        
  def getAntisenseOnly(self, boolean):
    """
    Only consider transcripts that overlap in the opposite direction
    @param boolean: whether transcripts should overlap in the opposite direction
    @type  boolean: boolean
    """
    self.antisense = boolean
    
    
  def getUniqueOutput(self, boolean):
    """
    Get transcript output once
    @param boolean: whether transcripts should be repeated in output
    @type  boolean: boolean
    """
    self.antisense = boolean


  def getTranscriptJoin(self, type, boolean):
    """
    Join with transcripts for query/reference set
    @param boolean: whether query/reference transcripts should be joined too
    @type  boolean: boolean
    """
    self.transcriptJoins[type] = boolean
    

  def getExonJoin(self, type, boolean):
    """
    Join with exons for query/reference set
    @param boolean: whether query/reference exons should be joined too
    @type  boolean: boolean
    """
    self.exonJoins[type] = boolean
    
    
  def removeTables(self):
    """
    Remove the temporary MySQL tables

    """
    for type in self.TYPES:
      self.transcriptOriginalBases[type].remove()
      self.transcriptWorkingBases[type].remove()


  def clearTables(self):
    """
    Empty the content of the databases
    """
    for type in self.TYPES:
      self.transcriptOriginalBases[type].clear()
      self.transcriptWorkingBases[type].clear()
    

  def modifyTranscriptList(self, type):
    """
    Put a transcript list in the MySQL database
    @param type: whether use query/reference data
    @type  type: int
    """
    if self.starts[type] != None:
      for chromosome in self.transcriptWorkingBases[type]:
        self.mySqlConnection.executeQuery("UPDATE %s SET end = start + %d WHERE direction = 1" % (self.transcriptWorkingBases[type][chromosome].name, self.starts[type]))
        self.mySqlConnection.executeQuery("UPDATE %s SET start = end - %d WHERE direction = -1" % (self.transcriptWorkingBases[type][chromosome].name, self.starts[type]))
    if self.fivePrimes[type] != None:
      for chromosome in self.transcriptWorkingBases[type]:
        self.mySqlConnection.executeQuery("UPDATE %s SET start = start - %d WHERE direction = 1" % (self.transcriptWorkingBases[type][chromosome].name, self.fivePrime[type]))
        self.mySqlConnection.executeQuery("UPDATE %s SET end = end + %d WHERE direction = -1" % (self.transcriptWorkingBases[type][chromosome].name, self.fivePrime[type]))
    if self.threePrimes[type] != None:
      for chromosome in self.transcriptWorkingBases[type]:
        self.mySqlConnection.executeQuery("UPDATE %s SET end = end + %d WHERE direction = 1" % (self.transcriptWorkingBases[type][chromosome].name, self.threePrime[type]))
        self.mySqlConnection.executeQuery("UPDATE %s SET start = start - %d WHERE direction = -1" % (self.transcriptWorkingBases[type][chromosome].name, self.threePrime[type]))


  def join(self):
    """
    Join the tables
    """
    chromosomeSet = set(self.transcriptWorkingBases[self.QUERY].keys()) & set(self.transcriptWorkingBases[self.REFERENCE].keys())
    for chromosome in chromosomeSet:
      tableNames = [None, None]
      for type in self.TYPES:
        if self.introns[type]:
          tableNames[type] = self.transcriptWorkingBases[type][chromosome].name
        else:
          tableNames[type] = self.transcriptWorkingBases[type][chromosome].exonsTable.name
          
      selectCommand = "SELECT"
      if self.uniqueOutput:
        selectCommand += " DISTINCT"
      for type in self.TYPES:
        if self.transcriptJoins[type]:
          selectCommand += " %s.*" % (self.transcriptWorkingBases[type][chromosome].name)
        if self.exonJoins[type]:
          selectCommand += " %s.*" % (self.transcriptWorkingBases[type][chromosome].exonsTable.name)
  
      fromCommand = "FROM %s, %s" % (tableNames[self.QUERY], tableNames[self.REFERENCE])
      for type in self.TYPES:
        if not self.introns[type] and self.transcriptJoins[type]:
          fromCommand += ", %s" % (self.transcriptWorkingBases[type][chromosome].name)
        if self.introns[type] and self.exonJoins[type]:
          fromCommand += ", %s" % (self.transcriptWorkingBases[type][chromosome].exonsTable.name)
        
      whereCommand = "WHERE %s.chromosome LIKE %s.chromosome AND %s.start <= %s.end AND %s.end >= %s.start" % (tableNames[self.QUERY], tableNames[self.REFERENCE], tableNames[self.QUERY], tableNames[self.REFERENCE], tableNames[self.QUERY], tableNames[self.REFERENCE])
      if self.colinear:
        whereCommand += " AND %s.direction = %s.direction" % (tableNames[self.QUERY], tableNames[self.REFERENCE])
      if self.antisense:
        whereCommand += " AND %s.direction != %s.direction" % (tableNames[self.QUERY], tableNames[self.REFERENCE])
      for type in self.TYPES:
        if (not self.introns[type] and self.transcriptJoins[type]) or (self.introns[type] and self.exonJoins[type]):
          whereCommand += " AND %s.transcriptId = %s.id" % (self.transcriptWorkingBases[type][chromosome].exonsTable.name, self.transcriptWorkingBases[type][chromosome].name)
        
      command = "%s %s %s" % (selectCommand, fromCommand, whereCommand)
      if self.verbosity > 0:
        print "Joining %s..." % (chromosome)
      query   = self.mySqlConnection.executeQuery(command)
      if self.verbosity > 0:
        print "... done."
      return query


  def compareTranscriptList(self):
    """
    Compare a list of transcript to the reference one
    @return: the transcripts that overlap with the reference set
    """
    outputTranscriptList = TranscriptList()
    nbTranscripts        = 0
    
    for type in self.TYPES:
      self.modifyTranscriptList(type)
    query = self.join()
    for line in query.getIterator():
      # find those transcripts that actually overlap
      print line

    print "reference:   %d sequences, %d nucleotides" % (self.nbTranscripts[self.QUERY], self.nbNucleotides[self.QUERY])
    print "transcripts: %d sequences (%f%%)" % (nbTranscripts, nbTranscripts / float(self.nbTranscripts[self.QUERY]) * 100)

    return outputTranscriptList
