#! /usr/bin/env python
"""
Cluster the data into regions (defined by size and overlap with next region) and keep only highest peaks.
"""

import os
from optparse import OptionParser
from structure.transcriptContainer import *
from parsing.wigParser import *
from misc.progress import *


if __name__ == "__main__":
  
  # parse command line
  description = "Get WIG Data: Cluster the data contained in a WIG file (thus covering a large proportion of the genome) into regions given by a set of genomic coordinates. [Category: Personnal]"

  parser = OptionParser(description = description)
  parser.add_option("-i", "--input",        dest="inputFileName",     action="store",                        type="string", help="input file [compulsory] [format: file in transcript format given by -f]")
  parser.add_option("-f", "--inputFormat",  dest="inputFormat",       action="store",                        type="string", help="format of the input file [compulsory] [format: transcript file format]")
  parser.add_option("-w", "--wig",          dest="wig",               action="store",                        type="string", help="wig directory [compulsory] [format: file in WIG format]")  
  parser.add_option("-o", "--output",       dest="outputFileName",    action="store",                        type="string", help="output file [compulsory] [format: output file in GFF3 format]")
  parser.add_option("-s", "--shape",        dest="shape",             action="store",                        type="string", help="shape of the output: plain, dis (distribution), cum (cumulated distribution) [format: choice (plain, dis, cum)]")
  parser.add_option("-p", "--points",       dest="points",            action="store",                        type="int",    help="number of points [format: int]")
  parser.add_option("-m", "--minValue",     dest="minValue",          action="store",      default=0,        type="int",    help="minimum value [default: 0] [format: float] [default: 0]")
  parser.add_option("-M", "--maxValue",     dest="maxValue",          action="store",      default=1,        type="int",    help="maximum value [default: 1] [format: float] [default: 1]")  
  parser.add_option("-v", "--verbosity",    dest="verbosity",         action="store",      default=1,        type="int",    help="trace level [format: int]")
  parser.add_option("-l", "--log",          dest="log",               action="store_true", default=False,                   help="write a log file [format: bool] [default: false]")
  (options, args) = parser.parse_args()

  if options.log:
    logHandle = open("%s.log" % options.outputFileName, "w")

  # create parsers
  transcriptParser = TranscriptContainer(options.inputFileName, options.inputFormat, options.verbosity)
  wigParser        = WigParser(options.wig)
  
  data = {}
  progress = Progress(transcriptParser.getNbTranscripts(), "Parsing %s" % (options.inputFileName), options.verbosity)
  for transcript in transcriptParser.getIterator():
    values = transcript.extractWigData(wigParser)
    data[transcript.name] = sum(values) / len(values)
    progress.inc()
  progress.done()

  if options.shape == None or options.shape == "plain":
    outputFile = open(options.outputFileName, "w")
    for name in data:
      outputFile.write("%s\t%f\n" % (name, data[name]))
    outputFile.close()
    sys.exit()
  
  values = data.values.sort()
  step          = (options.maxValue - options.minValue) / float(options.points)
  previousIndex = options.minValue
  previousValue = 0
  outputData    = [0] * (options.points + 1)
    
  if options.shape == "cum":
    for value in values:
      index = int((value - options.minValue) * options.points)
      if index > previousIndex:
        for i in range(previousIndex, index):
          outputData[i] = previousValue
      previousValue    += 1
      previousIndex     = index
      outputData[index] = previousValue
    for i in range(previousIndex, options.points + 1):
      outputData[i] = previousValue
  else:
    for value in values:
      index              = int((value - options.minValue) * options.points)
      outputData[index] += 1
    
  outputFile = open(options.outputFileName, "w")
  for i in range(0, options.points + 1):
    outputFile.write("%f\t%f\n" % (i * step + options.minValue, outputData[i]))
  outputFile.close()

