#! /usr/bin/env python
"""Get the error distribution from a mapping"""

import os
from optparse import OptionParser
from transcriptContainer import *
from progress import *
from rPlotter import *


if __name__ == "__main__":
  
  # parse command line
  parser = OptionParser()
  parser.add_option("-i", "--input",     dest="inputFileName",  action="store",                        type="string", help="input file")
  parser.add_option("-f", "--format",    dest="format",         action="store",                        type="string", help="format of file")
  parser.add_option("-o", "--output",    dest="outputFileName", action="store",                        type="string", help="output .png file")
  parser.add_option("-c", "--csv",       dest="csv",            action="store_true", default=False,                   help="write a .csv file")
  parser.add_option("-x", "--xMax",      dest="xMax",           action="store",      default=None,     type="int",    help="maximum value on the x-axis to plot")
  parser.add_option("-v", "--verbosity", dest="verbosity",      action="store",      default=1,        type="int",    help="trace level")
  parser.add_option("-l", "--log",       dest="log",            action="store_true", default=False,                   help="write a log file")
  (options, args) = parser.parse_args()

  parser  = TranscriptContainer(options.inputFileName, options.format, options.verbosity)
  nbLines = parser.getNbTranscripts()
  print "%i lines found" % (nbLines)

  # treat mappings
  errors5    = dict()
  names      = dict()
  sum5       = 0
  minimum    = 1000000000
  maximum    = 0
  nbMappings = 0
  progress   = Progress(nbLines, "Analyzing mappings of " + options.inputFileName, options.verbosity)
  for transcript in parser.getIterator():
    
    comment   = transcript.comment
    fields    = comment.split(";")
    intervals = fields[3].split(",")
    positions = intervals[0].split("-")
    error5    = int(positions[0])
      
    minimum = min(minimum, error5)
    maximum = max(maximum, error5)
    
    if error5 not in errors5:
      errors5[error5] = 1
      names[error5]   = [transcript.name]
    else:
      errors5[error5] += 1
      names[error5].append(transcript.name)
    sum5 += error5
    
    nbMappings += 1
    progress.inc()
  progress.done()


  # plot sequences
  plotter = RPlotter("%s.png" % (options.outputFileName), options.verbosity)
  plotter.setFill(0)
  plotter.setMaximumX(options.xMax)
  plotter.addLine(errors5)
  plotter.plot()
  
  if options.csv:
    csvHandle = open("%s.csv" % (options.outputFileName), "w")
    for error5 in range(min(errors5.keys()), max(errors5.keys())+1):
      if error5 not in errors5:
        csvHandle.write("%d;0;\n" % (error5))
      else:
        csvHandle.write("%d;%d;%s\n" % (error5, errors5[error5], ",".join(names[error5])))
    csvHandle.close()

  print "nb mappings: %d" % (nbMappings)
  print "min/avg/max sizes: %d/%.2f/%d" % (minimum, float(sum5) / nbMappings, maximum)
