diff smart_toolShed/SMART/Java/Python/runRandomJobs.py @ 0:e0f8dcca02ed

Uploaded S-MART tool. A toolbox manages RNA-Seq and ChIP-Seq data.
author yufei-luo
date Thu, 17 Jan 2013 10:52:14 -0500
parents
children
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/smart_toolShed/SMART/Java/Python/runRandomJobs.py	Thu Jan 17 10:52:14 2013 -0500
@@ -0,0 +1,46 @@
+import unittest
+import os
+import time
+from optparse import OptionParser
+from SMART.Java.Python.ncList.test.MockFindOverlaps_randomExample import MockFindOverlaps_randomExample
+from SMART.Java.Python.FindOverlapsOptim import FindOverlapsOptim
+
+if __name__ == '__main__':
+    description = "runRandomJobs: create random ref/query files (with size given), and run the jobs on cluster with help of runJobs.sh"
+
+    parser = OptionParser(description = description)
+    parser.add_option("-i", "--inputRef", dest="inputRefGff3FileName", action="store", type="string", help="Reference input file [compulsory] [format: file in gff3 format]")
+    parser.add_option("-j", "--inputQuery", dest="inputQueryGff3FileName", action="store", type="string", help="Query input file [compulsory] [format: file in gff3 format]")
+    parser.add_option("-m", "--inputRefSize", dest="numberOfRefReads", action="store", type="int", help="The number of Reference")
+    parser.add_option("-n", "--inputQuerySize", dest="numberOfQReads", action="store", type="int", help="The number of Query")
+    parser.add_option("-o", "--output", dest="outputGff3FileName", action="store", type="string", help="output file [compulsory] [format: output file in gff3 format]")
+    (options, args) = parser.parse_args()
+    
+    outputDataName = 'timeResult.dat' 
+    fTime = open(outputDataName, 'w')  
+    fTime.write('NbRef\tNbQuery\tNbOverlap\ttime\n')   
+    chromSize = 100000
+    print 'ref size = %d,  query size = %d' %(options.numberOfRefReads, options.numberOfQReads)
+    iMFOR_ref = MockFindOverlaps_randomExample(options.inputRefGff3FileName, 'ref', options.numberOfRefReads, chromSize)
+    iMFOR_ref.write()
+    cmd_ref = 'sort -f -n -k4 -k5.4rn -o %s %s' % (options.inputRefGff3FileName, options.inputRefGff3FileName)
+    os.system(cmd_ref)
+    iMFOR_query = MockFindOverlaps_randomExample(options.inputQueryGff3FileName,'q', options.numberOfQReads, chromSize)
+    iMFOR_query.write()
+    cmd_query = 'sort -f -n -k4 -k5.4rn -o %s %s' % (options.inputQueryGff3FileName, options.inputQueryGff3FileName)
+    os.system(cmd_query)
+    iFOO = FindOverlaps_optim(options.inputRefGff3FileName, options.inputQueryGff3FileName)
+    iFOO.setOutputGff3FileName(options.outputGff3FileName)
+    
+    startTime_optim = time.time()
+    iFOO.run()
+    iFOO.close()  
+    nbOverlap = iFOO.getNbOverlap() 
+    endTime_optim = time.time()    
+    cmd = 'sort -f -n -k4 -k5.4rn -k9.5 -t ";" -o %s %s' % (options.outputGff3FileName, options.outputGff3FileName)
+    os.system(cmd)
+    totalTime_optim = endTime_optim - startTime_optim
+    print 'we take %s second.' % (totalTime_optim)
+    fTime.write('%d\t%d\t%d\t%.2f\n'%(options.numberOfRefReads, options.numberOfQReads, nbOverlap, totalTime_optim))
+    iFOO.deletIntermediateFiles()
+    fTime.close()