#! /usr/bin/env python
"""Find random regions in a genome"""

import random
from optparse import OptionParser
from parsing.fastaParser import *
from writer.gff3Writer import *
from writer.mySqlTranscriptWriter import *
from misc.progress import *


if __name__ == "__main__":
  
  # parse command line
  description = "Get Random Regions: Get some random coordinates on a genome. [Category: Other]"

  parser = OptionParser(description = description)
  parser.add_option("-r", "--reference",       dest="reference",      action="store",                     type="string", help="file that contains the sequences [compulsory] [format: file in FASTA format]")
  parser.add_option("-o", "--output",          dest="outputFileName", action="store",      default=None,  type="string", help="output file [compulsory] [format: output file in FASTA format]")
  parser.add_option("-s", "--size",            dest="size",           action="store",      default=None,  type="int",    help="size of the regions [compulsory] [format: int]")
  parser.add_option("-n", "--number",          dest="number",         action="store",      default=None,  type="int",    help="number of regions [compulsory] [format: int]")
  parser.add_option("-y", "--mysql",           dest="mysql",          action="store_true", default=False,                help="mySQL output [format: bool] [default: false]")
  parser.add_option("-v", "--verbosity",       dest="verbosity",      action="store",      default=1,     type="int",    help="trace level [format: int]")
  parser.add_option("-l", "--log",             dest="log",            action="store",      default=None,  type="string", help="write a log file [format: bool]")
  (options, args) = parser.parse_args()

  logHandle = None
  if options.log != None:
    logHandle = open(options.log, "w")
  
  sequenceParser = FastaParser(options.reference, options.verbosity)
  chromosomes    = sequenceParser.getRegions()
  cumulatedSize  = 0
  cumulatedSizes = {}
  for chromosome in chromosomes:
    cumulatedSize             += sequenceParser.getSizeOfRegion(chromosome)
    cumulatedSizes[chromosome] = cumulatedSize

  writer = Gff3Writer("%s.gff3" % (options.outputFileName), options.verbosity)
  if options.mysql:
    sqlWriter = MySqlTranscriptWriter(options.outputFileName, options.verbosity)

  random.seed()
  outputFile = open(options.outputFileName, "w")
  progress   = Progress(options.number, "Writing to %s" % (options.outputFileName), options.verbosity)
  for i in range(options.number):
    integer = random.randint(0, cumulatedSize)
    for chromosome in chromosomes:
      if cumulatedSizes[chromosome] > integer:
        break
    start      = random.randint(1, sequenceParser.getSizeOfRegion(chromosome) - options.size)
    transcript = Transcript()
    transcript.setChromosome(chromosome)
    transcript.setStart(start)
    transcript.setSize(options.size)
    transcript.setName("rand_%d" % (i+1))
    transcript.setDirection("+")
    writer.addTranscript(transcript)
    if options.mysql:
      sqlWriter.addTranscript(transcript)
    progress.inc()
  progress.done()
  outputFile.close()
  
  if options.mysql:
    sqlWriter.write()

  if options.log:
    logHandle.close()
