comparison SMART/Java/Python/ncList/ConvertToNCList.py @ 6:769e306b7933

Change the repository level.
author yufei-luo
date Fri, 18 Jan 2013 04:54:14 -0500
parents
children
comparison
equal deleted inserted replaced
5:ea3082881bf8 6:769e306b7933
1 #! /usr/bin/env python
2 #
3 # Copyright INRA-URGI 2009-2012
4 #
5 # This software is governed by the CeCILL license under French law and
6 # abiding by the rules of distribution of free software. You can use,
7 # modify and/ or redistribute the software under the terms of the CeCILL
8 # license as circulated by CEA, CNRS and INRIA at the following URL
9 # "http://www.cecill.info".
10 #
11 # As a counterpart to the access to the source code and rights to copy,
12 # modify and redistribute granted by the license, users are provided only
13 # with a limited warranty and the software's author, the holder of the
14 # economic rights, and the successive licensors have only limited
15 # liability.
16 #
17 # In this respect, the user's attention is drawn to the risks associated
18 # with loading, using, modifying and/or developing or reproducing the
19 # software by the user in light of its specific status of free software,
20 # that may mean that it is complicated to manipulate, and that also
21 # therefore means that it is reserved for developers and experienced
22 # professionals having in-depth computer knowledge. Users are therefore
23 # encouraged to load and test the software's suitability as regards their
24 # requirements in conditions enabling the security of their systems and/or
25 # data to be ensured and, more generally, to use and operate it in the
26 # same conditions as regards security.
27 #
28 # The fact that you are presently reading this means that you have had
29 # knowledge of the CeCILL license and that you accept its terms.
30 #
31
32 import random, os, time, shutil
33 from optparse import OptionParser
34 from commons.core.parsing.ParserChooser import ParserChooser
35 from SMART.Java.Python.structure.Transcript import Transcript
36 from SMART.Java.Python.structure.Interval import Interval
37 from SMART.Java.Python.ncList.NCList import NCList
38 from SMART.Java.Python.ncList.NCListCursor import NCListCursor
39 from SMART.Java.Python.ncList.NCListFilePickle import NCListFilePickle, NCListFileUnpickle
40 from SMART.Java.Python.ncList.FileSorter import FileSorter
41 from SMART.Java.Python.ncList.NCListMerger import NCListMerger
42 from SMART.Java.Python.misc.Progress import Progress
43 from SMART.Java.Python.misc.UnlimitedProgress import UnlimitedProgress
44 try:
45 import cPickle as pickle
46 except:
47 import pickle
48
49 class ConvertToNCList(object):
50
51 def __init__(self, verbosity = 1):
52 self._parsers = {}
53 self._sortedFileNames = {}
54 self._inputFileName = None
55 self._outputFileName = None
56 self._index = False
57 self._ncLists = {}
58 self._splittedFileNames = {}
59 self._nbElements = 0
60 self._nbElementsPerChromosome = {}
61 self._randomNumber = random.randint(0, 10000)
62 self._sorted = False
63 self._verbosity = verbosity
64
65 def setInputFileName(self, fileName, format):
66 self._inputFileName = fileName
67 chooser = ParserChooser(self._verbosity)
68 chooser.findFormat(format)
69 self._parser = chooser.getParser(fileName)
70
71 def setOutputFileName(self, fileName):
72 self._outputFileName = fileName
73 fileNameNoExtension = os.path.splitext(fileName)[0]
74 baseName = "%s_%d" % (fileNameNoExtension, self._randomNumber)
75 self._directory = "%s_files" % (baseName)
76 if not os.path.exists(self._directory):
77 os.makedirs(self._directory)
78 self._sortedFileNames = os.path.join(self._directory, baseName)
79
80 def setIndex(self, boolean):
81 self._index = boolean
82
83 def setSorted(self, boolean):
84 self._sorted = boolean
85
86 def sortFile(self):
87 if self._verbosity > 2:
88 print "%s file %s..." % ("Rewriting" if self._sorted else "Sorting", self._inputFileName)
89 startTime = time.time()
90 fs = FileSorter(self._parser, self._verbosity-4)
91 fs.setPresorted(self._sorted)
92 fs.perChromosome(True)
93 fs.setOutputFileName(self._sortedFileNames)
94 fs.sort()
95 self._splittedFileNames = fs.getOutputFileNames()
96 self._nbElementsPerChromosome = fs.getNbElementsPerChromosome()
97 self._nbElements = fs.getNbElements()
98 endTime = time.time()
99 if self._verbosity > 2:
100 print " ...done (%ds)" % (endTime - startTime)
101
102 def createNCLists(self):
103 self._ncLists = {}
104 if self._verbosity > 2:
105 print "Creating NC-list for %s..." % (self._inputFileName)
106 startTime = time.time()
107 for chromosome, fileName in self._splittedFileNames.iteritems():
108 if self._verbosity > 3:
109 print " chromosome %s" % (chromosome)
110 ncList = NCList(self._verbosity)
111 if self._index:
112 ncList.createIndex(True)
113 ncList.setChromosome(chromosome)
114 ncList.setFileName(fileName)
115 ncList.setNbElements(self._nbElementsPerChromosome[chromosome])
116 ncList.buildLists()
117 self._ncLists[chromosome] = ncList
118 endTime = time.time()
119 if self._verbosity > 2:
120 print " ...done (%ds)" % (endTime - startTime)
121
122 def writeOutputFile(self):
123 merger = NCListMerger(self._verbosity)
124 merger.setFileName(self._outputFileName)
125 merger.addIndex(self._index)
126 merger.setNCLists(self._ncLists)
127 merger.merge()
128
129 def cleanFiles(self):
130 shutil.rmtree(self._directory)
131
132 def run(self):
133 self.sortFile()
134 self.createNCLists()
135 self.writeOutputFile()
136 self.cleanFiles()
137
138 def getSortedFileNames(self):
139 return self._splittedFileNames
140
141 def getNbElements(self):
142 return self._nbElements
143
144 def getNbElementsPerChromosome(self):
145 return self._nbElementsPerChromosome
146
147 def getNCLists(self):
148 return self._ncLists
149
150 def getTmpDirectory(self):
151 return self._directory
152
153
154 if __name__ == "__main__":
155 description = "Convert To NC-List v1.0.0: Convert a mapping or transcript file into a NC-List. [Category: NC-List]"
156
157 parser = OptionParser(description = description)
158 parser.add_option("-i", "--input", dest="inputFileName", action="store", type="string", help="Query input file [compulsory] [format: file in transcript format given by -f]")
159 parser.add_option("-f", "--format", dest="format", action="store", type="string", help="format of previous file [compulsory] [format: transcript file format]")
160 parser.add_option("-d", "--index", dest="index", action="store_true", default=False, help="create an index [default: false] [format: boolean]")
161 parser.add_option("-o", "--output", dest="outputFileName", action="store", type="string", help="Output file [compulsory] [format: output file in NCList format]")
162 parser.add_option("-s", "--sorted", dest="sorted", action="store_true", default=False, help="input file is already sorted [format: boolean] [default: False]")
163 parser.add_option("-v", "--verbosity", dest="verbosity", action="store", default=1, type="int", help="Trace level [format: int] [default: 1]")
164 (options, args) = parser.parse_args()
165
166 ctncl = ConvertToNCList(options.verbosity)
167 ctncl.setInputFileName(options.inputFileName, options.format)
168 ctncl.setOutputFileName(options.outputFileName)
169 ctncl.setIndex(options.index)
170 ctncl.setSorted(options.sorted)
171 ctncl.run()
172