import re
import sys
from structure.interval import *
from parsing.transcriptListParser import *


class GbParser(TranscriptListParser):
  """A class that parses a GBrowse file and create a transcript list"""


  def __init__(self, fileName, verbosity = 0):
    self.reference = None
    self.color     = None
    super(GbParser, self).__init__(fileName, verbosity)


  def __del__(self):
    super(GbParser, self).__del__()


  def getFileFormats():
    return ["gb", "gbrowse"]
  getFileFormats = staticmethod(getFileFormats)


  def skipFirstLines(self):
    for line in self.handle:
      self.currentLineNb += 1
      line = line.strip()
      m = re.search(r"^\s*bgcolor\s*=\s*(\S+)\s*$", line)
      if m != None:
        self.color = m.group(1)
      if line == "":
        return


  def parseLine(self, line):
    transcript = Transcript()
    # first line (reference)
    m = re.search(r"^\s*reference\s*=\s*(\S+)\s*$", line)
    if m != None:
      self.reference = m.group(1)
      for line in self.handle:
        line = line.strip()
        self.currentLineNb += 1
        break
    # second line (genomic coordinates)
    m = re.search(r"^\s*READS\s+(\S+)\s+(\S+)\s+\"([^\"]*)\"\s*$", line)
    if m == None:
      sys.exit("\nLine %d '%s' does not have a GBrowse format" % (self.currentLineNb, line))
    if self.reference == None:
      sys.exit("Cannot get reference of GBrowse line %d '%s'" % (self.currentLineNb, line))
    transcript.setChromosome(self.reference)
    transcript.setName(m.group(1))
    transcript.setComment(m.group(3))
    # exons
    exons = m.group(2).split(",")
    transcriptStart = 1000000000
    transcriptEnd   = 0
    direction       = 0
    for exon in exons:
      m = re.search(r"^(\d+)-(\d+)$", exon)
      if m == None:
        sys.exit("\nCannot read GBrowse exon line %d '%s'" % (self.currentLineNb, exon))
      interval = Interval()
      interval.setChromosome(transcript.chromosome)
      direction += int(m.group(2)) - int(m.group(1))
      start = min(int(m.group(1)), int(m.group(2)))
      end   = max(int(m.group(1)), int(m.group(2)))
      interval.setStart(start)
      interval.setEnd(end)
      transcriptStart = min(transcriptStart, start)
      transcriptEnd   = max(transcriptEnd, end)
      transcript.addExon(interval)
    transcript.setStart(transcriptStart)
    transcript.setEnd(transcriptEnd)
    transcript.setDirection(direction)
    for exon in transcript.getExons():
      exon.setDirection(direction)
    return transcript

