Mercurial > repos > yufei-luo > s_mart
comparison commons/core/utils/FileUtils.py @ 38:2c0c0a89fad7
Uploaded
| author | m-zytnicki |
|---|---|
| date | Thu, 02 May 2013 09:56:47 -0400 |
| parents | 44d5973c188c |
| children |
comparison
equal
deleted
inserted
replaced
| 37:d22fadc825e3 | 38:2c0c0a89fad7 |
|---|---|
| 1 # Copyright INRA (Institut National de la Recherche Agronomique) | |
| 2 # http://www.inra.fr | |
| 3 # http://urgi.versailles.inra.fr | |
| 4 # | |
| 5 # This software is governed by the CeCILL license under French law and | |
| 6 # abiding by the rules of distribution of free software. You can use, | |
| 7 # modify and/ or redistribute the software under the terms of the CeCILL | |
| 8 # license as circulated by CEA, CNRS and INRIA at the following URL | |
| 9 # "http://www.cecill.info". | |
| 10 # | |
| 11 # As a counterpart to the access to the source code and rights to copy, | |
| 12 # modify and redistribute granted by the license, users are provided only | |
| 13 # with a limited warranty and the software's author, the holder of the | |
| 14 # economic rights, and the successive licensors have only limited | |
| 15 # liability. | |
| 16 # | |
| 17 # In this respect, the user's attention is drawn to the risks associated | |
| 18 # with loading, using, modifying and/or developing or reproducing the | |
| 19 # software by the user in light of its specific status of free software, | |
| 20 # that may mean that it is complicated to manipulate, and that also | |
| 21 # therefore means that it is reserved for developers and experienced | |
| 22 # professionals having in-depth computer knowledge. Users are therefore | |
| 23 # encouraged to load and test the software's suitability as regards their | |
| 24 # requirements in conditions enabling the security of their systems and/or | |
| 25 # data to be ensured and, more generally, to use and operate it in the | |
| 26 # same conditions as regards security. | |
| 27 # | |
| 28 # The fact that you are presently reading this means that you have had | |
| 29 # knowledge of the CeCILL license and that you accept its terms. | |
| 30 | |
| 31 | |
| 32 import os | |
| 33 import glob | |
| 34 import shutil | |
| 35 import sys | |
| 36 import re | |
| 37 import math | |
| 38 try: | |
| 39 import hashlib | |
| 40 except: | |
| 41 pass | |
| 42 | |
| 43 | |
| 44 class FileUtils( object ): | |
| 45 | |
| 46 ## Return the number of lines in the given file | |
| 47 # | |
| 48 def getNbLinesInSingleFile( fileName ): | |
| 49 fileHandler = open( fileName, "r" ) | |
| 50 lines = fileHandler.readlines() | |
| 51 fileHandler.close() | |
| 52 if (len(lines)>0 and lines[-1]== "\n"): | |
| 53 return (len(lines)-1) | |
| 54 else : | |
| 55 return len(lines) | |
| 56 | |
| 57 getNbLinesInSingleFile = staticmethod( getNbLinesInSingleFile ) | |
| 58 | |
| 59 ## Return the number of lines in the files in the given list | |
| 60 # | |
| 61 def getNbLinesInFileList( lFileNames ): | |
| 62 count = 0 | |
| 63 for fileName in lFileNames: | |
| 64 count += FileUtils.getNbLinesInSingleFile( fileName ) | |
| 65 return count | |
| 66 | |
| 67 getNbLinesInFileList = staticmethod( getNbLinesInFileList ) | |
| 68 | |
| 69 ## Return True if the given file exists, False otherwise | |
| 70 # | |
| 71 def isRessourceExists( fileName ): | |
| 72 return os.path.exists( fileName ) | |
| 73 | |
| 74 isRessourceExists = staticmethod( isRessourceExists ) | |
| 75 | |
| 76 ## Return True if the given file is empty, False otherwise | |
| 77 # | |
| 78 def isEmpty( fileName ): | |
| 79 return 0 == FileUtils.getNbLinesInSingleFile( fileName ) | |
| 80 | |
| 81 isEmpty = staticmethod( isEmpty ) | |
| 82 | |
| 83 ## Return True if both files are identical, False otherwise | |
| 84 # | |
| 85 def are2FilesIdentical( file1, file2 ): | |
| 86 tmpFile = "diff_%s_%s" % ( os.path.basename(file1), os.path.basename(file2) ) | |
| 87 cmd = "diff %s %s >> %s" % ( file1, file2, tmpFile ) | |
| 88 returnStatus = os.system( cmd ) | |
| 89 if returnStatus != 0: | |
| 90 print "WARNING: 'diff' returned '%i'" % returnStatus | |
| 91 os.remove( tmpFile ) | |
| 92 return False | |
| 93 if FileUtils.isEmpty( tmpFile ): | |
| 94 os.remove( tmpFile ) | |
| 95 return True | |
| 96 else: | |
| 97 os.remove( tmpFile ) | |
| 98 return False | |
| 99 | |
| 100 are2FilesIdentical = staticmethod( are2FilesIdentical ) | |
| 101 | |
| 102 ## Return a string with all the content of the files in the given list | |
| 103 # | |
| 104 def getFileContent( lFiles ): | |
| 105 content = "" | |
| 106 lFiles.sort() | |
| 107 for fileName in lFiles: | |
| 108 currentFile = open( fileName, "r" ) | |
| 109 content += currentFile.read() | |
| 110 currentFile.close() | |
| 111 return content | |
| 112 | |
| 113 getFileContent = staticmethod( getFileContent ) | |
| 114 | |
| 115 ## Save content of the given file after having sorted it | |
| 116 # | |
| 117 def sortFileContent( inFile, outFile="" ): | |
| 118 inFileHandler = open(inFile, "r" ) | |
| 119 lines = inFileHandler.readlines() | |
| 120 inFileHandler.close() | |
| 121 lines.sort() | |
| 122 if outFile == "": | |
| 123 outFile = inFile | |
| 124 outFileHandler = open( outFile, "w" ) | |
| 125 outFileHandler.writelines( lines ) | |
| 126 outFileHandler.close() | |
| 127 | |
| 128 sortFileContent = staticmethod( sortFileContent ) | |
| 129 | |
| 130 ## Add end-of-line symbol to the given file content if necessary | |
| 131 # | |
| 132 def addNewLineAtTheEndOfFileContent( fileContent ): | |
| 133 if not fileContent.endswith('\n') and len(fileContent) != 0: | |
| 134 fileContent += '\n' | |
| 135 return fileContent | |
| 136 | |
| 137 addNewLineAtTheEndOfFileContent = staticmethod( addNewLineAtTheEndOfFileContent ) | |
| 138 | |
| 139 ## Concatenate files in the given list | |
| 140 # | |
| 141 def catFilesFromList( lFiles, outFile, sort=True, skipHeaders = False, separator = "" ): | |
| 142 if sort: | |
| 143 lFiles.sort() | |
| 144 outFileHandler = open( outFile, "a" ) | |
| 145 isFirstFile = True | |
| 146 for singleFile in lFiles: | |
| 147 if not isFirstFile: | |
| 148 outFileHandler.write(separator) | |
| 149 isFirstFile = False | |
| 150 singleFileHandler = open( singleFile, "r" ) | |
| 151 if skipHeaders: | |
| 152 singleFileHandler.readline() | |
| 153 line = singleFileHandler.readline() | |
| 154 while line: | |
| 155 outFileHandler.write(line) | |
| 156 line = singleFileHandler.readline() | |
| 157 singleFileHandler.close() | |
| 158 outFileHandler.close() | |
| 159 | |
| 160 catFilesFromList = staticmethod( catFilesFromList ) | |
| 161 | |
| 162 ## Concatenate files according to the given pattern | |
| 163 # | |
| 164 def catFilesByPattern( pattern, outFile, skipHeaders = False, separator = "" ): | |
| 165 lFiles = glob.glob( pattern ) | |
| 166 FileUtils.catFilesFromList( lFiles, outFile, skipHeaders = skipHeaders, separator = separator ) | |
| 167 | |
| 168 catFilesByPattern = staticmethod( catFilesByPattern ) | |
| 169 | |
| 170 ## Remove files listed according to the given pattern | |
| 171 # | |
| 172 # @example prefix="/home/tmp/dummy*.txt" | |
| 173 # | |
| 174 def removeFilesByPattern( prefix ): | |
| 175 lFiles = glob.glob( prefix ) | |
| 176 for f in lFiles: | |
| 177 os.remove( f ) | |
| 178 | |
| 179 removeFilesByPattern = staticmethod( removeFilesByPattern ) | |
| 180 | |
| 181 ## Remove files listed according to the suffixes in the given list | |
| 182 # | |
| 183 def removeFilesBySuffixList( targetPath, lSuffixes ): | |
| 184 if targetPath[-1] == "/": | |
| 185 targetPath = targetPath[:-1] | |
| 186 for suffix in lSuffixes: | |
| 187 pattern = "%s/*%s" % ( targetPath, suffix ) | |
| 188 FileUtils.removeFilesByPattern( pattern ) | |
| 189 | |
| 190 removeFilesBySuffixList = staticmethod( removeFilesBySuffixList ) | |
| 191 | |
| 192 ## Remove repeated blanks in the given file | |
| 193 # | |
| 194 def removeRepeatedBlanks( inFile, outFile="" ): | |
| 195 if outFile == "": | |
| 196 outFile = inFile | |
| 197 tmpFile = "tr_%s_%s" % ( inFile, outFile ) | |
| 198 cmd = "tr -s ' ' < %s > %s" % ( inFile, tmpFile ) | |
| 199 os.system( cmd ) | |
| 200 os.rename( tmpFile, outFile ) | |
| 201 | |
| 202 removeRepeatedBlanks = staticmethod( removeRepeatedBlanks ) | |
| 203 | |
| 204 ## Remove files in the given list | |
| 205 # | |
| 206 @staticmethod | |
| 207 def removeFilesFromList(lFiles): | |
| 208 for f in lFiles: | |
| 209 os.remove(f) | |
| 210 | |
| 211 ## Remove files in the given list if exist | |
| 212 # | |
| 213 @staticmethod | |
| 214 def removeFilesFromListIfExist(lFiles): | |
| 215 for fileName in lFiles: | |
| 216 if FileUtils.isRessourceExists(fileName): | |
| 217 os.remove(fileName) | |
| 218 | |
| 219 ## Append the content of a file to another file | |
| 220 # | |
| 221 # @param inFile string name of the input file | |
| 222 # @param outFile string name of the output file | |
| 223 # | |
| 224 def appendFileContent( inFile, outFile ): | |
| 225 outFileHandler = open( outFile, "a" ) | |
| 226 inFileHandler = open( inFile, "r" ) | |
| 227 shutil.copyfileobj( inFileHandler, outFileHandler ) | |
| 228 inFileHandler.close() | |
| 229 outFileHandler.close() | |
| 230 | |
| 231 appendFileContent = staticmethod( appendFileContent ) | |
| 232 | |
| 233 | |
| 234 ## Replace Windows end-of-line by Unix end-of-line | |
| 235 # | |
| 236 def fromWindowsToUnixEof( inFile ): | |
| 237 tmpFile = "%s.tmp" % ( inFile ) | |
| 238 shutil.copyfile( inFile, tmpFile ) | |
| 239 os.remove( inFile ) | |
| 240 tmpFileHandler = open( tmpFile, "r" ) | |
| 241 inFileHandler = open( inFile, "w" ) | |
| 242 while True: | |
| 243 line = tmpFileHandler.readline() | |
| 244 if line == "": | |
| 245 break | |
| 246 inFileHandler.write( line.replace("\r\n","\n") ) | |
| 247 tmpFileHandler.close() | |
| 248 inFileHandler.close() | |
| 249 os.remove( tmpFile ) | |
| 250 | |
| 251 fromWindowsToUnixEof = staticmethod( fromWindowsToUnixEof ) | |
| 252 | |
| 253 | |
| 254 ## Remove duplicated lines in a file | |
| 255 # | |
| 256 # @note it preserves the initial order and handles blank lines | |
| 257 # | |
| 258 def removeDuplicatedLines( inFile ): | |
| 259 tmpFile = "%s.tmp" % ( inFile ) | |
| 260 shutil.copyfile( inFile, tmpFile ) | |
| 261 os.remove( inFile ) | |
| 262 | |
| 263 tmpFileHandler = open( tmpFile, "r" ) | |
| 264 lLines = list( tmpFileHandler.read().split("\n") ) | |
| 265 if lLines[-1] == "": | |
| 266 del lLines[-1] | |
| 267 sLines = set( lLines ) | |
| 268 tmpFileHandler.close() | |
| 269 os.remove( tmpFile ) | |
| 270 | |
| 271 inFileHandler = open( inFile, "w" ) | |
| 272 for line in lLines: | |
| 273 if line in sLines: | |
| 274 inFileHandler.write( "%s\n" % ( line ) ) | |
| 275 sLines.remove( line ) | |
| 276 inFileHandler.close() | |
| 277 | |
| 278 removeDuplicatedLines = staticmethod( removeDuplicatedLines ) | |
| 279 | |
| 280 | |
| 281 ## Write a list of lines in a given file | |
| 282 # | |
| 283 def writeLineListInFile( inFile, lLines ): | |
| 284 inFileHandler = open( inFile, "w" ) | |
| 285 for line in lLines: | |
| 286 inFileHandler.write( line ) | |
| 287 inFileHandler.close() | |
| 288 | |
| 289 writeLineListInFile = staticmethod( writeLineListInFile ) | |
| 290 | |
| 291 | |
| 292 ## Give the list of absolute path of each directory in the given directory | |
| 293 # | |
| 294 # @param rootPath string absolute path of the given directory | |
| 295 # | |
| 296 # @return lDirPath list of absolute directory path | |
| 297 # | |
| 298 def getAbsoluteDirectoryPathList(rootPath): | |
| 299 lDirPath = [] | |
| 300 lPaths = glob.glob(rootPath + "/*") | |
| 301 for ressource in lPaths: | |
| 302 if os.path.isdir(ressource) : | |
| 303 lDirPath.append(ressource) | |
| 304 return lDirPath | |
| 305 | |
| 306 getAbsoluteDirectoryPathList = staticmethod(getAbsoluteDirectoryPathList) | |
| 307 | |
| 308 | |
| 309 ## Get a sublist of which each element matches/doesn't match a pattern | |
| 310 # | |
| 311 # @param lPath string list of paths | |
| 312 # | |
| 313 # @param pattern string pattern | |
| 314 # | |
| 315 # @param match bool | |
| 316 # | |
| 317 # @return lPathMatching list of path matching pattern | |
| 318 # | |
| 319 def getSubListAccordingToPattern(lPath, pattern, match = True): | |
| 320 lPathMatching = [] | |
| 321 for path in lPath: | |
| 322 if match: | |
| 323 if re.match(".*%s.*" % pattern, path): | |
| 324 lPathMatching.append(path) | |
| 325 else: | |
| 326 if not re.match(".*%s.*" % pattern, path): | |
| 327 lPathMatching.append(path) | |
| 328 return lPathMatching | |
| 329 | |
| 330 getSubListAccordingToPattern = staticmethod(getSubListAccordingToPattern) | |
| 331 | |
| 332 | |
| 333 ## Give the list of file names found in the given directory | |
| 334 # | |
| 335 # @param dirPath string absolute path of the given directory | |
| 336 # | |
| 337 # @return lFilesInDir list of file names | |
| 338 # | |
| 339 def getFileNamesList( dirPath, patternFileFilter = ".*" ): | |
| 340 lFilesInDir = [] | |
| 341 lPaths = glob.glob( dirPath + "/*" ) | |
| 342 for ressource in lPaths: | |
| 343 if os.path.isfile( ressource ): | |
| 344 fileName = os.path.basename( ressource ) | |
| 345 if re.match(patternFileFilter, fileName): | |
| 346 lFilesInDir.append( fileName ) | |
| 347 return lFilesInDir | |
| 348 | |
| 349 getFileNamesList = staticmethod( getFileNamesList ) | |
| 350 | |
| 351 ## Return the MD5 sum of a file | |
| 352 # | |
| 353 def getMd5SecureHash( inFile ): | |
| 354 if "hashlib" in sys.modules: | |
| 355 md5 = hashlib.md5() | |
| 356 inFileHandler = open( inFile, "r" ) | |
| 357 while True: | |
| 358 line = inFileHandler.readline() | |
| 359 if line == "": | |
| 360 break | |
| 361 md5.update( line ) | |
| 362 inFileHandler.close() | |
| 363 return md5.hexdigest() | |
| 364 else: | |
| 365 return "" | |
| 366 | |
| 367 getMd5SecureHash = staticmethod( getMd5SecureHash ) | |
| 368 | |
| 369 ## Cat all files of a given directory | |
| 370 # | |
| 371 # @param dir string directory name | |
| 372 # @param outFileName string output file name | |
| 373 # | |
| 374 def catFilesOfDir(dir, outFileName): | |
| 375 lFiles = FileUtils.getFileNamesList(dir) | |
| 376 lFile2 = [] | |
| 377 for file in lFiles: | |
| 378 lFile2.append(dir + "/" + file) | |
| 379 FileUtils.catFilesFromList(lFile2, outFileName) | |
| 380 | |
| 381 catFilesOfDir = staticmethod(catFilesOfDir) | |
| 382 | |
| 383 ## Return True if size file > 0 octet | |
| 384 # | |
| 385 # @param fileName string file name | |
| 386 # | |
| 387 def isSizeNotNull(fileName): | |
| 388 size = os.path.getsize(fileName) | |
| 389 if size > 0: | |
| 390 return True | |
| 391 return False | |
| 392 | |
| 393 isSizeNotNull = staticmethod(isSizeNotNull) | |
| 394 | |
| 395 ## Split one file into N Files by lines | |
| 396 # | |
| 397 # @param fileName string file name | |
| 398 # @param N int number of files to create | |
| 399 # | |
| 400 @staticmethod | |
| 401 def splitFileIntoNFiles(fileName, N): | |
| 402 nbLine = FileUtils.getNbLinesInSingleFile(fileName) | |
| 403 nbLinesInEachFile = nbLine | |
| 404 if N > nbLine: | |
| 405 N = nbLine | |
| 406 if N != 0: | |
| 407 nbLinesInEachFile = math.ceil(float(nbLine) / N) | |
| 408 else: | |
| 409 N = 1 | |
| 410 filePrefix, fileExt = os.path.splitext(os.path.basename(fileName)) | |
| 411 fileHandler = open(fileName, "r") | |
| 412 for i in range(1,N+1): | |
| 413 with open("%s-%s%s" %(filePrefix, i, fileExt), "w") as f: | |
| 414 j = 0 | |
| 415 while j < nbLinesInEachFile: | |
| 416 j += 1 | |
| 417 f.write(fileHandler.readline()) | |
| 418 fileHandler.close() | |
| 419 | |
| 420 ## Split one file into files of N lines | |
| 421 # | |
| 422 # @param fileName string input file name | |
| 423 # @param N int lines number per files | |
| 424 # | |
| 425 @staticmethod | |
| 426 def splitFileAccordingToLineNumber(fileName, N): | |
| 427 filePrefix, fileExt = os.path.splitext(os.path.basename(fileName)) | |
| 428 with open(fileName) as inF: | |
| 429 fileNb = 1 | |
| 430 line = inF.readline() | |
| 431 if not line or N == 0: | |
| 432 outFileName = "%s-%s%s" %(filePrefix, fileNb, fileExt) | |
| 433 f = open(outFileName, "wb") | |
| 434 shutil.copyfileobj(open(fileName, "rb"), f) | |
| 435 f.close() | |
| 436 else: | |
| 437 while line: | |
| 438 outFileName = "%s-%s%s" %(filePrefix, fileNb, fileExt) | |
| 439 with open(outFileName, "w") as outF: | |
| 440 lineNb = 1 | |
| 441 while lineNb <= N and line: | |
| 442 outF.write(line) | |
| 443 line = inF.readline() | |
| 444 lineNb += 1 | |
| 445 fileNb += 1 |
