6
|
1 # Copyright INRA (Institut National de la Recherche Agronomique)
|
|
2 # http://www.inra.fr
|
|
3 # http://urgi.versailles.inra.fr
|
|
4 #
|
|
5 # This software is governed by the CeCILL license under French law and
|
|
6 # abiding by the rules of distribution of free software. You can use,
|
|
7 # modify and/ or redistribute the software under the terms of the CeCILL
|
|
8 # license as circulated by CEA, CNRS and INRIA at the following URL
|
|
9 # "http://www.cecill.info".
|
|
10 #
|
|
11 # As a counterpart to the access to the source code and rights to copy,
|
|
12 # modify and redistribute granted by the license, users are provided only
|
|
13 # with a limited warranty and the software's author, the holder of the
|
|
14 # economic rights, and the successive licensors have only limited
|
|
15 # liability.
|
|
16 #
|
|
17 # In this respect, the user's attention is drawn to the risks associated
|
|
18 # with loading, using, modifying and/or developing or reproducing the
|
|
19 # software by the user in light of its specific status of free software,
|
|
20 # that may mean that it is complicated to manipulate, and that also
|
|
21 # therefore means that it is reserved for developers and experienced
|
|
22 # professionals having in-depth computer knowledge. Users are therefore
|
|
23 # encouraged to load and test the software's suitability as regards their
|
|
24 # requirements in conditions enabling the security of their systems and/or
|
|
25 # data to be ensured and, more generally, to use and operate it in the
|
|
26 # same conditions as regards security.
|
|
27 #
|
|
28 # The fact that you are presently reading this means that you have had
|
|
29 # knowledge of the CeCILL license and that you accept its terms.
|
|
30
|
|
31
|
|
32 import os
|
|
33 import glob
|
|
34 import shutil
|
|
35 import sys
|
|
36 import re
|
|
37 import math
|
|
38 try:
|
|
39 import hashlib
|
|
40 except:
|
|
41 pass
|
|
42
|
|
43
|
|
44 class FileUtils( object ):
|
|
45
|
|
46 ## Return the number of lines in the given file
|
|
47 #
|
|
48 def getNbLinesInSingleFile( fileName ):
|
|
49 fileHandler = open( fileName, "r" )
|
|
50 lines = fileHandler.readlines()
|
|
51 fileHandler.close()
|
|
52 if (len(lines)>0 and lines[-1]== "\n"):
|
|
53 return (len(lines)-1)
|
|
54 else :
|
|
55 return len(lines)
|
|
56
|
|
57 getNbLinesInSingleFile = staticmethod( getNbLinesInSingleFile )
|
|
58
|
|
59 ## Return the number of lines in the files in the given list
|
|
60 #
|
|
61 def getNbLinesInFileList( lFileNames ):
|
|
62 count = 0
|
|
63 for fileName in lFileNames:
|
|
64 count += FileUtils.getNbLinesInSingleFile( fileName )
|
|
65 return count
|
|
66
|
|
67 getNbLinesInFileList = staticmethod( getNbLinesInFileList )
|
|
68
|
|
69 ## Return True if the given file exists, False otherwise
|
|
70 #
|
|
71 def isRessourceExists( fileName ):
|
|
72 return os.path.exists( fileName )
|
|
73
|
|
74 isRessourceExists = staticmethod( isRessourceExists )
|
|
75
|
|
76 ## Return True if the given file is empty, False otherwise
|
|
77 #
|
|
78 def isEmpty( fileName ):
|
|
79 return 0 == FileUtils.getNbLinesInSingleFile( fileName )
|
|
80
|
|
81 isEmpty = staticmethod( isEmpty )
|
|
82
|
|
83 ## Return True if both files are identical, False otherwise
|
|
84 #
|
|
85 def are2FilesIdentical( file1, file2 ):
|
|
86 tmpFile = "diff_%s_%s" % ( os.path.basename(file1), os.path.basename(file2) )
|
|
87 cmd = "diff %s %s >> %s" % ( file1, file2, tmpFile )
|
|
88 returnStatus = os.system( cmd )
|
|
89 if returnStatus != 0:
|
|
90 msg = "ERROR: 'diff' returned '%i'" % ( returnStatus )
|
|
91 sys.stderr.write( "%s\n" % msg )
|
|
92 sys.stderr.flush()
|
|
93 os.remove( tmpFile )
|
|
94 return False
|
|
95 if FileUtils.isEmpty( tmpFile ):
|
|
96 os.remove( tmpFile )
|
|
97 return True
|
|
98 else:
|
|
99 os.remove( tmpFile )
|
|
100 return False
|
|
101
|
|
102 are2FilesIdentical = staticmethod( are2FilesIdentical )
|
|
103
|
|
104 ## Return a string with all the content of the files in the given list
|
|
105 #
|
|
106 def getFileContent( lFiles ):
|
|
107 content = ""
|
|
108 lFiles.sort()
|
|
109 for fileName in lFiles:
|
|
110 currentFile = open( fileName, "r" )
|
|
111 content += currentFile.read()
|
|
112 currentFile.close()
|
|
113 return content
|
|
114
|
|
115 getFileContent = staticmethod( getFileContent )
|
|
116
|
|
117 ## Save content of the given file after having sorted it
|
|
118 #
|
|
119 def sortFileContent( inFile, outFile="" ):
|
|
120 inFileHandler = open(inFile, "r" )
|
|
121 lines = inFileHandler.readlines()
|
|
122 inFileHandler.close()
|
|
123 lines.sort()
|
|
124 if outFile == "":
|
|
125 outFile = inFile
|
|
126 outFileHandler = open( outFile, "w" )
|
|
127 outFileHandler.writelines( lines )
|
|
128 outFileHandler.close()
|
|
129
|
|
130 sortFileContent = staticmethod( sortFileContent )
|
|
131
|
|
132 ## Add end-of-line symbol to the given file content if necessary
|
|
133 #
|
|
134 def addNewLineAtTheEndOfFileContent( fileContent ):
|
|
135 if not fileContent.endswith('\n') and len(fileContent) != 0:
|
|
136 fileContent += '\n'
|
|
137 return fileContent
|
|
138
|
|
139 addNewLineAtTheEndOfFileContent = staticmethod( addNewLineAtTheEndOfFileContent )
|
|
140
|
|
141 ## Concatenate files in the given list
|
|
142 #
|
|
143 def catFilesFromList( lFiles, outFile, sort=True, skipHeaders = False, separator = "" ):
|
|
144 if sort:
|
|
145 lFiles.sort()
|
|
146 outFileHandler = open( outFile, "a" )
|
|
147 isFirstFile = True
|
|
148 for singleFile in lFiles:
|
|
149 if not isFirstFile:
|
|
150 outFileHandler.write(separator)
|
|
151 isFirstFile = False
|
|
152 singleFileHandler = open( singleFile, "r" )
|
|
153 if skipHeaders:
|
|
154 singleFileHandler.readline()
|
|
155 line = singleFileHandler.readline()
|
|
156 while line:
|
|
157 outFileHandler.write(line)
|
|
158 line = singleFileHandler.readline()
|
|
159 singleFileHandler.close()
|
|
160 outFileHandler.close()
|
|
161
|
|
162 catFilesFromList = staticmethod( catFilesFromList )
|
|
163
|
|
164 ## Concatenate files according to the given pattern
|
|
165 #
|
|
166 def catFilesByPattern( pattern, outFile, skipHeaders = False, separator = "" ):
|
|
167 lFiles = glob.glob( pattern )
|
|
168 FileUtils.catFilesFromList( lFiles, outFile, skipHeaders = skipHeaders, separator = separator )
|
|
169
|
|
170 catFilesByPattern = staticmethod( catFilesByPattern )
|
|
171
|
|
172 ## Remove files listed according to the given pattern
|
|
173 #
|
|
174 # @example prefix="/home/tmp/dummy*.txt"
|
|
175 #
|
|
176 def removeFilesByPattern( prefix ):
|
|
177 lFiles = glob.glob( prefix )
|
|
178 for f in lFiles:
|
|
179 os.remove( f )
|
|
180
|
|
181 removeFilesByPattern = staticmethod( removeFilesByPattern )
|
|
182
|
|
183 ## Remove files listed according to the suffixes in the given list
|
|
184 #
|
|
185 def removeFilesBySuffixList( targetPath, lSuffixes ):
|
|
186 if targetPath[-1] == "/":
|
|
187 targetPath = targetPath[:-1]
|
|
188 for suffix in lSuffixes:
|
|
189 pattern = "%s/*%s" % ( targetPath, suffix )
|
|
190 FileUtils.removeFilesByPattern( pattern )
|
|
191
|
|
192 removeFilesBySuffixList = staticmethod( removeFilesBySuffixList )
|
|
193
|
|
194 ## Remove repeated blanks in the given file
|
|
195 #
|
|
196 def removeRepeatedBlanks( inFile, outFile="" ):
|
|
197 if outFile == "":
|
|
198 outFile = inFile
|
|
199 tmpFile = "tr_%s_%s" % ( inFile, outFile )
|
|
200 cmd = "tr -s ' ' < %s > %s" % ( inFile, tmpFile )
|
|
201 os.system( cmd )
|
|
202 os.rename( tmpFile, outFile )
|
|
203
|
|
204 removeRepeatedBlanks = staticmethod( removeRepeatedBlanks )
|
|
205
|
|
206 ## Remove files in the given list
|
|
207 #
|
|
208 @staticmethod
|
|
209 def removeFilesFromList(lFiles):
|
|
210 for f in lFiles:
|
|
211 os.remove(f)
|
|
212
|
|
213 ## Remove files in the given list if exist
|
|
214 #
|
|
215 @staticmethod
|
|
216 def removeFilesFromListIfExist(lFiles):
|
|
217 for fileName in lFiles:
|
|
218 if FileUtils.isRessourceExists(fileName):
|
|
219 os.remove(fileName)
|
|
220
|
|
221 ## Append the content of a file to another file
|
|
222 #
|
|
223 # @param inFile string name of the input file
|
|
224 # @param outFile string name of the output file
|
|
225 #
|
|
226 def appendFileContent( inFile, outFile ):
|
|
227 outFileHandler = open( outFile, "a" )
|
|
228 inFileHandler = open( inFile, "r" )
|
|
229 shutil.copyfileobj( inFileHandler, outFileHandler )
|
|
230 inFileHandler.close()
|
|
231 outFileHandler.close()
|
|
232
|
|
233 appendFileContent = staticmethod( appendFileContent )
|
|
234
|
|
235
|
|
236 ## Replace Windows end-of-line by Unix end-of-line
|
|
237 #
|
|
238 def fromWindowsToUnixEof( inFile ):
|
|
239 tmpFile = "%s.tmp" % ( inFile )
|
|
240 shutil.copyfile( inFile, tmpFile )
|
|
241 os.remove( inFile )
|
|
242 tmpFileHandler = open( tmpFile, "r" )
|
|
243 inFileHandler = open( inFile, "w" )
|
|
244 while True:
|
|
245 line = tmpFileHandler.readline()
|
|
246 if line == "":
|
|
247 break
|
|
248 inFileHandler.write( line.replace("\r\n","\n") )
|
|
249 tmpFileHandler.close()
|
|
250 inFileHandler.close()
|
|
251 os.remove( tmpFile )
|
|
252
|
|
253 fromWindowsToUnixEof = staticmethod( fromWindowsToUnixEof )
|
|
254
|
|
255
|
|
256 ## Remove duplicated lines in a file
|
|
257 #
|
|
258 # @note it preserves the initial order and handles blank lines
|
|
259 #
|
|
260 def removeDuplicatedLines( inFile ):
|
|
261 tmpFile = "%s.tmp" % ( inFile )
|
|
262 shutil.copyfile( inFile, tmpFile )
|
|
263 os.remove( inFile )
|
|
264
|
|
265 tmpFileHandler = open( tmpFile, "r" )
|
|
266 lLines = list( tmpFileHandler.read().split("\n") )
|
|
267 if lLines[-1] == "":
|
|
268 del lLines[-1]
|
|
269 sLines = set( lLines )
|
|
270 tmpFileHandler.close()
|
|
271 os.remove( tmpFile )
|
|
272
|
|
273 inFileHandler = open( inFile, "w" )
|
|
274 for line in lLines:
|
|
275 if line in sLines:
|
|
276 inFileHandler.write( "%s\n" % ( line ) )
|
|
277 sLines.remove( line )
|
|
278 inFileHandler.close()
|
|
279
|
|
280 removeDuplicatedLines = staticmethod( removeDuplicatedLines )
|
|
281
|
|
282
|
|
283 ## Write a list of lines in a given file
|
|
284 #
|
|
285 def writeLineListInFile( inFile, lLines ):
|
|
286 inFileHandler = open( inFile, "w" )
|
|
287 for line in lLines:
|
|
288 inFileHandler.write( line )
|
|
289 inFileHandler.close()
|
|
290
|
|
291 writeLineListInFile = staticmethod( writeLineListInFile )
|
|
292
|
|
293
|
|
294 ## Give the list of absolute path of each directory in the given directory
|
|
295 #
|
|
296 # @param rootPath string absolute path of the given directory
|
|
297 #
|
|
298 # @return lDirPath list of absolute directory path
|
|
299 #
|
|
300 def getAbsoluteDirectoryPathList(rootPath):
|
|
301 lDirPath = []
|
|
302 lPaths = glob.glob(rootPath + "/*")
|
|
303 for ressource in lPaths:
|
|
304 if os.path.isdir(ressource) :
|
|
305 lDirPath.append(ressource)
|
|
306 return lDirPath
|
|
307
|
|
308 getAbsoluteDirectoryPathList = staticmethod(getAbsoluteDirectoryPathList)
|
|
309
|
|
310
|
|
311 ## Get a sublist of which each element matches/doesn't match a pattern
|
|
312 #
|
|
313 # @param lPath string list of paths
|
|
314 #
|
|
315 # @param pattern string pattern
|
|
316 #
|
|
317 # @param match bool
|
|
318 #
|
|
319 # @return lPathMatching list of path matching pattern
|
|
320 #
|
|
321 def getSubListAccordingToPattern(lPath, pattern, match = True):
|
|
322 lPathMatching = []
|
|
323 for path in lPath:
|
|
324 if match:
|
|
325 if re.match(".*%s.*" % pattern, path):
|
|
326 lPathMatching.append(path)
|
|
327 else:
|
|
328 if not re.match(".*%s.*" % pattern, path):
|
|
329 lPathMatching.append(path)
|
|
330 return lPathMatching
|
|
331
|
|
332 getSubListAccordingToPattern = staticmethod(getSubListAccordingToPattern)
|
|
333
|
|
334
|
|
335 ## Give the list of file names found in the given directory
|
|
336 #
|
|
337 # @param dirPath string absolute path of the given directory
|
|
338 #
|
|
339 # @return lFilesInDir list of file names
|
|
340 #
|
|
341 def getFileNamesList( dirPath, patternFileFilter = ".*" ):
|
|
342 lFilesInDir = []
|
|
343 lPaths = glob.glob( dirPath + "/*" )
|
|
344 for ressource in lPaths:
|
|
345 if os.path.isfile( ressource ):
|
|
346 fileName = os.path.basename( ressource )
|
|
347 if re.match(patternFileFilter, fileName):
|
|
348 lFilesInDir.append( fileName )
|
|
349 return lFilesInDir
|
|
350
|
|
351 getFileNamesList = staticmethod( getFileNamesList )
|
|
352
|
|
353 ## Return the MD5 sum of a file
|
|
354 #
|
|
355 def getMd5SecureHash( inFile ):
|
|
356 if "hashlib" in sys.modules:
|
|
357 md5 = hashlib.md5()
|
|
358 inFileHandler = open( inFile, "r" )
|
|
359 while True:
|
|
360 line = inFileHandler.readline()
|
|
361 if line == "":
|
|
362 break
|
|
363 md5.update( line )
|
|
364 inFileHandler.close()
|
|
365 return md5.hexdigest()
|
|
366 else:
|
|
367 return ""
|
|
368
|
|
369 getMd5SecureHash = staticmethod( getMd5SecureHash )
|
|
370
|
|
371 ## Cat all files of a given directory
|
|
372 #
|
|
373 # @param dir string directory name
|
|
374 # @param outFileName string output file name
|
|
375 #
|
|
376 def catFilesOfDir(dir, outFileName):
|
|
377 lFiles = FileUtils.getFileNamesList(dir)
|
|
378 lFile2 = []
|
|
379 for file in lFiles:
|
|
380 lFile2.append(dir + "/" + file)
|
|
381 FileUtils.catFilesFromList(lFile2, outFileName)
|
|
382
|
|
383 catFilesOfDir = staticmethod(catFilesOfDir)
|
|
384
|
|
385 ## Return True if size file > 0 octet
|
|
386 #
|
|
387 # @param fileName string file name
|
|
388 #
|
|
389 def isSizeNotNull(fileName):
|
|
390 size = os.path.getsize(fileName)
|
|
391 if size > 0:
|
|
392 return True
|
|
393 return False
|
|
394
|
|
395 isSizeNotNull = staticmethod(isSizeNotNull)
|
|
396
|
|
397 ## Split one file into N Files by lines
|
|
398 #
|
|
399 # @param fileName string file name
|
|
400 # @param N int number of files to create
|
|
401 #
|
|
402 @staticmethod
|
|
403 def splitFileIntoNFiles(fileName, N):
|
|
404 nbLine = FileUtils.getNbLinesInSingleFile(fileName)
|
|
405 nbLinesInEachFile = nbLine
|
|
406 if N > nbLine:
|
|
407 N = nbLine
|
|
408 if N != 0:
|
|
409 nbLinesInEachFile = math.ceil(float(nbLine) / N)
|
|
410 else:
|
|
411 N = 1
|
|
412 filePrefix, fileExt = os.path.splitext(os.path.basename(fileName))
|
|
413 fileHandler = open(fileName, "r")
|
|
414 for i in range(1,N+1):
|
|
415 with open("%s-%s%s" %(filePrefix, i, fileExt), "w") as f:
|
|
416 j = 0
|
|
417 while j < nbLinesInEachFile:
|
|
418 j += 1
|
|
419 f.write(fileHandler.readline())
|
|
420 fileHandler.close()
|
|
421
|
|
422 ## Split one file into files of N lines
|
|
423 #
|
|
424 # @param fileName string input file name
|
|
425 # @param N int lines number per files
|
|
426 #
|
|
427 @staticmethod
|
|
428 def splitFileAccordingToLineNumber(fileName, N):
|
|
429 filePrefix, fileExt = os.path.splitext(os.path.basename(fileName))
|
|
430 with open(fileName) as inF:
|
|
431 fileNb = 1
|
|
432 line = inF.readline()
|
|
433 if not line or N == 0:
|
|
434 outFileName = "%s-%s%s" %(filePrefix, fileNb, fileExt)
|
|
435 f = open(outFileName, "wb")
|
|
436 shutil.copyfileobj(open(fileName, "rb"), f)
|
|
437 f.close()
|
|
438 else:
|
|
439 while line:
|
|
440 outFileName = "%s-%s%s" %(filePrefix, fileNb, fileExt)
|
|
441 with open(outFileName, "w") as outF:
|
|
442 lineNb = 1
|
|
443 while lineNb <= N and line:
|
|
444 outF.write(line)
|
|
445 line = inF.readline()
|
|
446 lineNb += 1
|
|
447 fileNb += 1 |