import re
import sys
from parser.mapperParser import *


class ExoParser(MapperParser):
  """A class that parses the output of Exonerate - roll your own format"""

  def __init__(self, fileName, verbosity = 0):
    super(ExoParser, self).__init__(fileName, verbosity)


  def __del__(self):
    super(ExoParser, self).__del__()


  def skipFirstLines(self):
    while "Hostname" not in self.handle.readline():
      pass


  def parseLine(self, line):
    
    if line == "-- completed exonerate analysis\n":
      return None
    
    m = re.search(r"^\s*(\S+)\s+(\d+)\s+(\d+)\s+[+-]\s+(\S+)\s+(\d+)\s+(\d+)\s+([+-])\s+\d+\s+(\d+)\s+(\S.*)$", line)
    if m == None:
      sys.exit("Line '%s' does not have the RYO format" % (line))

    mapping      = Mapping()
    name         = m.group(1)
    queryStart   = min(int(m.group(2)), int(m.group(3)))
    queryEnd     = max(int(m.group(2)), int(m.group(3)))-1
    chromosome   = m.group(4)
    targetStart  = min(int(m.group(5)), int(m.group(6)))
    targetEnd    = max(int(m.group(5)), int(m.group(6)))-1
    direction    = m.group(7)
    nbMismatches = int(m.group(8))
    rest         = m.group(9).strip()
    
    nbGaps       = 0
    queryOffset  = 0
    targetOffset = 0
    
    subMapping = None
    m = re.search(r"^(\w)\s+(\d+)\s+(\d+)", rest)
    while m != None:
      queryDistance  = int(m.group(2))
      targetDistance = int(m.group(3))
      if m.group(1) == "M":
        if subMapping == None:
          subMapping = SubMapping()
  
          subMapping.setSize(queryDistance)
          subMapping.setDirection(direction)
    
          subMapping.queryInterval.setName(name)
          subMapping.queryInterval.setStart(queryStart + queryOffset)
          subMapping.queryInterval.setDirection(direction)
    
          subMapping.targetInterval.setChromosome(chromosome)
          subMapping.targetInterval.setStart(targetStart + targetOffset)
          subMapping.targetInterval.setDirection(1)
  
      elif m.group(1) == "G":
        nbGaps += max(queryDistance, targetDistance)
        
      elif m.group(1) == "I" or m.group(1) == "5" or m.group(1) == "3":
        if subMapping != None:
          subMapping.queryInterval.setEnd(queryStart + queryOffset - 1)
          subMapping.targetInterval.setEnd(targetStart + targetOffset - 1)
          mapping.addSubMapping(subMapping)
          subMapping = None
      else:
        sys.exit("Cannot understand sign '%s' in line %s" % (m.group(1), line))
      
      queryOffset  += queryDistance
      targetOffset += targetDistance
      rest = rest[m.end():].strip()
      m = re.search(r"^(\w)\s+(\d+)\s+(\d+)", rest)
      
    if subMapping != None:
      subMapping.queryInterval.setEnd(queryStart + queryOffset - 1)
      subMapping.targetInterval.setEnd(targetStart + targetOffset - 1)
      mapping.addSubMapping(subMapping)
            
    mapping.setNbMismatches(nbMismatches)
    mapping.setNbGaps(nbGaps)
    mapping.setDirection(direction)

    mapping.queryInterval.setName(name)
    mapping.queryInterval.setStart(queryStart)
    mapping.queryInterval.setEnd(queryEnd)

    mapping.targetInterval.setChromosome(chromosome)
    mapping.targetInterval.setStart(targetStart)
    mapping.targetInterval.setEnd(targetEnd)

    return mapping

