Mercurial > repos > yufei-luo > s_mart
comparison commons/tools/PrepareBatches.py @ 18:94ab73e8a190
Uploaded
author | m-zytnicki |
---|---|
date | Mon, 29 Apr 2013 03:20:15 -0400 |
parents | |
children |
comparison
equal
deleted
inserted
replaced
17:b0e8584489e6 | 18:94ab73e8a190 |
---|---|
1 #!/usr/bin/env python | |
2 | |
3 # Copyright INRA (Institut National de la Recherche Agronomique) | |
4 # http://www.inra.fr | |
5 # http://urgi.versailles.inra.fr | |
6 # | |
7 # This software is governed by the CeCILL license under French law and | |
8 # abiding by the rules of distribution of free software. You can use, | |
9 # modify and/ or redistribute the software under the terms of the CeCILL | |
10 # license as circulated by CEA, CNRS and INRIA at the following URL | |
11 # "http://www.cecill.info". | |
12 # | |
13 # As a counterpart to the access to the source code and rights to copy, | |
14 # modify and redistribute granted by the license, users are provided only | |
15 # with a limited warranty and the software's author, the holder of the | |
16 # economic rights, and the successive licensors have only limited | |
17 # liability. | |
18 # | |
19 # In this respect, the user's attention is drawn to the risks associated | |
20 # with loading, using, modifying and/or developing or reproducing the | |
21 # software by the user in light of its specific status of free software, | |
22 # that may mean that it is complicated to manipulate, and that also | |
23 # therefore means that it is reserved for developers and experienced | |
24 # professionals having in-depth computer knowledge. Users are therefore | |
25 # encouraged to load and test the software's suitability as regards their | |
26 # requirements in conditions enabling the security of their systems and/or | |
27 # data to be ensured and, more generally, to use and operate it in the | |
28 # same conditions as regards security. | |
29 # | |
30 # The fact that you are presently reading this means that you have had | |
31 # knowledge of the CeCILL license and that you accept its terms. | |
32 | |
33 import os | |
34 import sys | |
35 from ConfigParser import NoSectionError, NoOptionError | |
36 from commons.core.checker.CheckerUtils import CheckerUtils | |
37 from commons.core.checker.CheckerException import CheckerException | |
38 from commons.core.utils.FileUtils import FileUtils | |
39 from commons.core.seq.FastaUtils import FastaUtils | |
40 | |
41 class PrepareBatches(object): | |
42 | |
43 def __init__(self, pipelineName, projectDir, projectName, iConfig, verbose): | |
44 self._pipelineName = pipelineName | |
45 self._projectDir = projectDir | |
46 self._projectName = projectName | |
47 self._iConfig = iConfig | |
48 self._verbose = verbose | |
49 | |
50 def run(self): | |
51 if self._verbose > 0: | |
52 print "beginning of step 1" | |
53 sys.stdout.flush() | |
54 if FileUtils.isRessourceExists("%s_db" % self._projectName): | |
55 print "ERROR: directory '%s_db' already exists" % self._projectName | |
56 sys.exit(1) | |
57 | |
58 os.mkdir("%s_db" % self._projectName) | |
59 os.chdir("%s_db" % self._projectName) | |
60 genomeFastaFileName = "%s.fa" % self._projectName | |
61 os.symlink("../%s" % genomeFastaFileName, genomeFastaFileName) | |
62 sectionName = "prepare_batches" | |
63 self._checkConfig(sectionName) | |
64 | |
65 separator = "\n" | |
66 inGenomeFileHandler = open(genomeFastaFileName, "r") | |
67 try: | |
68 CheckerUtils.checkHeaders(inGenomeFileHandler) | |
69 except CheckerException, e: | |
70 print "Error in file %s. Wrong headers are :" % genomeFastaFileName | |
71 print separator.join(e.messages) | |
72 print "Authorized characters are : a-z A-Z 0-9 - . : _\n" | |
73 inGenomeFileHandler.close() | |
74 sys.exit(1) | |
75 inGenomeFileHandler.close() | |
76 | |
77 doClean = False | |
78 if self._iConfig.get(sectionName, "clean") == "yes": | |
79 doClean = True | |
80 chunkFilePrefix = "%s_chunks" % self._projectName | |
81 chunkLength = int(self._iConfig.get(sectionName, "chunk_length")) | |
82 chunkOverlap = int(self._iConfig.get(sectionName, "chunk_overlap")) | |
83 FastaUtils.dbChunks(genomeFastaFileName, chunkLength, chunkOverlap, 0, chunkFilePrefix, doClean, self._verbose) | |
84 | |
85 nbSeq = int(self._iConfig.get(sectionName, "nb_seq_per_batch")) | |
86 FastaUtils.splitFastaFileInBatches("%s.fa" % chunkFilePrefix, nbSeq * chunkLength) | |
87 | |
88 if self._iConfig.get(sectionName, "clean") == "yes": | |
89 FileUtils.removeFilesByPattern("%s.fa*" % self._projectName) | |
90 | |
91 os.chdir( ".." ) | |
92 if self._verbose > 0: | |
93 print "step 1 finished successfully" | |
94 sys.stdout.flush() | |
95 | |
96 def _checkConfig(self, sectionName): | |
97 try: | |
98 CheckerUtils.checkSectionInConfigFile(self._iConfig, sectionName) | |
99 except NoSectionError: | |
100 print "ERROR: the section %s must be in your configuration file" % sectionName | |
101 sys.exit(1) | |
102 try: | |
103 CheckerUtils.checkOptionInSectionInConfigFile(self._iConfig, sectionName, "chunk_length") | |
104 except NoOptionError: | |
105 print "ERROR: the option 'chunk_length' must be defined in %s in your configuration file" % sectionName | |
106 sys.exit(1) | |
107 try: | |
108 CheckerUtils.checkOptionInSectionInConfigFile(self._iConfig, sectionName, "chunk_overlap") | |
109 except NoOptionError: | |
110 print "ERROR: the option 'chunk_overlap' must be defined in %s in your configuration file" % sectionName | |
111 sys.exit(1) | |
112 try: | |
113 CheckerUtils.checkOptionInSectionInConfigFile(self._iConfig, sectionName, "nb_seq_per_batch") | |
114 except NoOptionError: | |
115 print "ERROR: the option 'nb_seq_per_batch' must be defined in %s in your configuration file" % sectionName | |
116 sys.exit(1) | |
117 try: | |
118 CheckerUtils.checkOptionInSectionInConfigFile(self._iConfig, sectionName, "resources") | |
119 except NoOptionError: | |
120 print "ERROR: the option 'resources' must be defined in %s in your configuration file" % sectionName | |
121 sys.exit(1) | |
122 try: | |
123 CheckerUtils.checkOptionInSectionInConfigFile(self._iConfig, sectionName, "tmpDir") | |
124 except NoOptionError: | |
125 print "ERROR: the option 'tmpDir' must be defined in %s in your configuration file" % sectionName | |
126 sys.exit(1) |