#! /usr/bin/env python
"""
Merge sliding windows of two different clusterings
"""

import os
from optparse import OptionParser
from structure.transcriptContainer import *
from writer.transcriptWriter import *
from misc.rPlotter import *
from misc.progress import *


if __name__ == "__main__":
  
  # parse command line
  description = "Merge Sliding Windows Clusters: Merge two files containing the results of a sliding windows clustering. [Category: Sliding Windows]"

  parser = OptionParser(description = description)
  parser.add_option("-i", "--input1",       dest="inputFileName1",    action="store",                      type="string", help="input file 1 [compulsory] [format: file in transcript format given by -f]")
  parser.add_option("-f", "--inputFormat1", dest="inputFormat1",      action="store",                      type="string", help="format of the input file 1 [compulsory] [format: transcript file format]")
  parser.add_option("-j", "--input2",       dest="inputFileName2",    action="store",                      type="string", help="input file 2 [compulsory] [format: file in transcript format given by -g]")
  parser.add_option("-g", "--inputFormat2", dest="inputFormat2",      action="store",                      type="string", help="format of the input file 2 [compulsory] [format: transcript file format]")
  parser.add_option("-o", "--output",       dest="outputFileName",    action="store",                      type="string", help="output file [compulsory] [format: output file in GFF3 format]")
  parser.add_option("-y", "--mysql",        dest="mysql",             action="store_true", default=False,                 help="mySQL output [format: bool] [default: false]")  
  parser.add_option("-v", "--verbosity",    dest="verbosity",         action="store",      default=1,      type="int",    help="trace level [format: int]")
  parser.add_option("-l", "--log",          dest="log",               action="store_true", default=False,                 help="write a log file [format: bool] [default: false]")
  (options, args) = parser.parse_args()

  if options.log:
    logHandle = open("%s.log" % options.outputFileName, "w")

  # create parser
  parser1 = TranscriptContainer(options.inputFileName1, options.inputFormat1, options.verbosity)
  parser2 = TranscriptContainer(options.inputFileName2, options.inputFormat2, options.verbosity)
  outputData = {}
    
  progress = Progress(parser1.getNbTranscripts(), "Reading file %s" % (options.inputFileName1), options.verbosity)
  for transcript in parser1.getIterator():
    chromosome = transcript.chromosome
    start      = transcript.start
    end        = transcript.end
    direction  = transcript.direction
    tags       = transcript.tags
    if chromosome not in outputData:
      outputData[chromosome] = {}
    if direction not in outputData[chromosome]:
      outputData[chromosome][direction] = {}
    if start in outputData[chromosome][direction]:
      sys.exit("Error! Two regions start at position %d in %s on strand %d" % (start, chromosome, direction))
    outputData[chromosome][direction][start] = {}
    if end in outputData[chromosome][direction][start]:
      sys.exit("Error! Two regions starting at %d end at position %d in %s on strand %d" % (start, end, chromosome, direction))
    outputData[chromosome][direction][start][end] = tags
    progress.inc()
  progress.done()

  progress = Progress(parser2.getNbTranscripts(), "Reading file %s" % (options.inputFileName2), options.verbosity)
  for transcript in parser2.getIterator():
    chromosome = transcript.chromosome
    start      = transcript.start
    end        = transcript.end
    direction  = transcript.direction
    tags       = transcript.tags
    if chromosome not in outputData:
      outputData[chromosome] = {}
    if direction not in outputData[chromosome]:
      outputData[chromosome][direction] = {}
    if start not in outputData[chromosome][direction]:
      outputData[chromosome][direction][start] = {}
    if end in outputData[chromosome][direction][start]:
      ends = outputData[chromosome][direction][start].keys()
      if len(ends) != 1:
        sys.exit("Error! Two regions starting at %d end at different positions in %s on strand %d" % (start, chromosome, direction))
      if ends[0] != end:
        sys.exit("Error! Two regions starting at %d end are not consistent (%d and %d) in %s on strand %d" % (start, end, ends[0], chromosome, direction))
      outputData[chromosome][direction][start][end].update(tags)
    else:
      outputData[chromosome][direction][start][end] = tags
    progress.inc()
  progress.done()
  
  # print the transcripts
  if options.verbosity > 0:
    print "Writing file..."
  writer = Gff3Writer("%s.gff3" % (options.outputFileName), options.verbosity)
  if options.mysql:
    mysqlWriter = MySqlTranscriptWriter(options.outputFileName, options.verbosity)
  cpt = 1
  for chromosome in outputData:
    for direction in outputData[chromosome]:
      for start in outputData[chromosome][direction]:
        for end in outputData[chromosome][direction][start]:
          transcript = Transcript()
          transcript.setChromosome(chromosome)
          transcript.setStart(start)
          transcript.setEnd(end)
          transcript.setDirection(direction)
          transcript.tags = outputData[chromosome][direction][start][end]
          transcript.setName("region_%d" % (cpt))
          cpt += 1
          writer.addTranscript(transcript)
          if options.mysql:
            mysqlWriter.addTranscript(transcript)
  writer.write()
  if options.mysql:
    mysqlWriter.write()
  if options.verbosity > 0:
    print " ... done"
