comparison commons/tools/PrepareBatches.py @ 18:94ab73e8a190

Uploaded
author m-zytnicki
date Mon, 29 Apr 2013 03:20:15 -0400
parents
children
comparison
equal deleted inserted replaced
17:b0e8584489e6 18:94ab73e8a190
1 #!/usr/bin/env python
2
3 # Copyright INRA (Institut National de la Recherche Agronomique)
4 # http://www.inra.fr
5 # http://urgi.versailles.inra.fr
6 #
7 # This software is governed by the CeCILL license under French law and
8 # abiding by the rules of distribution of free software. You can use,
9 # modify and/ or redistribute the software under the terms of the CeCILL
10 # license as circulated by CEA, CNRS and INRIA at the following URL
11 # "http://www.cecill.info".
12 #
13 # As a counterpart to the access to the source code and rights to copy,
14 # modify and redistribute granted by the license, users are provided only
15 # with a limited warranty and the software's author, the holder of the
16 # economic rights, and the successive licensors have only limited
17 # liability.
18 #
19 # In this respect, the user's attention is drawn to the risks associated
20 # with loading, using, modifying and/or developing or reproducing the
21 # software by the user in light of its specific status of free software,
22 # that may mean that it is complicated to manipulate, and that also
23 # therefore means that it is reserved for developers and experienced
24 # professionals having in-depth computer knowledge. Users are therefore
25 # encouraged to load and test the software's suitability as regards their
26 # requirements in conditions enabling the security of their systems and/or
27 # data to be ensured and, more generally, to use and operate it in the
28 # same conditions as regards security.
29 #
30 # The fact that you are presently reading this means that you have had
31 # knowledge of the CeCILL license and that you accept its terms.
32
33 import os
34 import sys
35 from ConfigParser import NoSectionError, NoOptionError
36 from commons.core.checker.CheckerUtils import CheckerUtils
37 from commons.core.checker.CheckerException import CheckerException
38 from commons.core.utils.FileUtils import FileUtils
39 from commons.core.seq.FastaUtils import FastaUtils
40
41 class PrepareBatches(object):
42
43 def __init__(self, pipelineName, projectDir, projectName, iConfig, verbose):
44 self._pipelineName = pipelineName
45 self._projectDir = projectDir
46 self._projectName = projectName
47 self._iConfig = iConfig
48 self._verbose = verbose
49
50 def run(self):
51 if self._verbose > 0:
52 print "beginning of step 1"
53 sys.stdout.flush()
54 if FileUtils.isRessourceExists("%s_db" % self._projectName):
55 print "ERROR: directory '%s_db' already exists" % self._projectName
56 sys.exit(1)
57
58 os.mkdir("%s_db" % self._projectName)
59 os.chdir("%s_db" % self._projectName)
60 genomeFastaFileName = "%s.fa" % self._projectName
61 os.symlink("../%s" % genomeFastaFileName, genomeFastaFileName)
62 sectionName = "prepare_batches"
63 self._checkConfig(sectionName)
64
65 separator = "\n"
66 inGenomeFileHandler = open(genomeFastaFileName, "r")
67 try:
68 CheckerUtils.checkHeaders(inGenomeFileHandler)
69 except CheckerException, e:
70 print "Error in file %s. Wrong headers are :" % genomeFastaFileName
71 print separator.join(e.messages)
72 print "Authorized characters are : a-z A-Z 0-9 - . : _\n"
73 inGenomeFileHandler.close()
74 sys.exit(1)
75 inGenomeFileHandler.close()
76
77 doClean = False
78 if self._iConfig.get(sectionName, "clean") == "yes":
79 doClean = True
80 chunkFilePrefix = "%s_chunks" % self._projectName
81 chunkLength = int(self._iConfig.get(sectionName, "chunk_length"))
82 chunkOverlap = int(self._iConfig.get(sectionName, "chunk_overlap"))
83 FastaUtils.dbChunks(genomeFastaFileName, chunkLength, chunkOverlap, 0, chunkFilePrefix, doClean, self._verbose)
84
85 nbSeq = int(self._iConfig.get(sectionName, "nb_seq_per_batch"))
86 FastaUtils.splitFastaFileInBatches("%s.fa" % chunkFilePrefix, nbSeq * chunkLength)
87
88 if self._iConfig.get(sectionName, "clean") == "yes":
89 FileUtils.removeFilesByPattern("%s.fa*" % self._projectName)
90
91 os.chdir( ".." )
92 if self._verbose > 0:
93 print "step 1 finished successfully"
94 sys.stdout.flush()
95
96 def _checkConfig(self, sectionName):
97 try:
98 CheckerUtils.checkSectionInConfigFile(self._iConfig, sectionName)
99 except NoSectionError:
100 print "ERROR: the section %s must be in your configuration file" % sectionName
101 sys.exit(1)
102 try:
103 CheckerUtils.checkOptionInSectionInConfigFile(self._iConfig, sectionName, "chunk_length")
104 except NoOptionError:
105 print "ERROR: the option 'chunk_length' must be defined in %s in your configuration file" % sectionName
106 sys.exit(1)
107 try:
108 CheckerUtils.checkOptionInSectionInConfigFile(self._iConfig, sectionName, "chunk_overlap")
109 except NoOptionError:
110 print "ERROR: the option 'chunk_overlap' must be defined in %s in your configuration file" % sectionName
111 sys.exit(1)
112 try:
113 CheckerUtils.checkOptionInSectionInConfigFile(self._iConfig, sectionName, "nb_seq_per_batch")
114 except NoOptionError:
115 print "ERROR: the option 'nb_seq_per_batch' must be defined in %s in your configuration file" % sectionName
116 sys.exit(1)
117 try:
118 CheckerUtils.checkOptionInSectionInConfigFile(self._iConfig, sectionName, "resources")
119 except NoOptionError:
120 print "ERROR: the option 'resources' must be defined in %s in your configuration file" % sectionName
121 sys.exit(1)
122 try:
123 CheckerUtils.checkOptionInSectionInConfigFile(self._iConfig, sectionName, "tmpDir")
124 except NoOptionError:
125 print "ERROR: the option 'tmpDir' must be defined in %s in your configuration file" % sectionName
126 sys.exit(1)