Mercurial > repos > yufei-luo > s_mart
comparison commons/core/utils/FileUtils.py @ 6:769e306b7933
Change the repository level.
| author | yufei-luo |
|---|---|
| date | Fri, 18 Jan 2013 04:54:14 -0500 |
| parents | |
| children | 94ab73e8a190 |
comparison
equal
deleted
inserted
replaced
| 5:ea3082881bf8 | 6:769e306b7933 |
|---|---|
| 1 # Copyright INRA (Institut National de la Recherche Agronomique) | |
| 2 # http://www.inra.fr | |
| 3 # http://urgi.versailles.inra.fr | |
| 4 # | |
| 5 # This software is governed by the CeCILL license under French law and | |
| 6 # abiding by the rules of distribution of free software. You can use, | |
| 7 # modify and/ or redistribute the software under the terms of the CeCILL | |
| 8 # license as circulated by CEA, CNRS and INRIA at the following URL | |
| 9 # "http://www.cecill.info". | |
| 10 # | |
| 11 # As a counterpart to the access to the source code and rights to copy, | |
| 12 # modify and redistribute granted by the license, users are provided only | |
| 13 # with a limited warranty and the software's author, the holder of the | |
| 14 # economic rights, and the successive licensors have only limited | |
| 15 # liability. | |
| 16 # | |
| 17 # In this respect, the user's attention is drawn to the risks associated | |
| 18 # with loading, using, modifying and/or developing or reproducing the | |
| 19 # software by the user in light of its specific status of free software, | |
| 20 # that may mean that it is complicated to manipulate, and that also | |
| 21 # therefore means that it is reserved for developers and experienced | |
| 22 # professionals having in-depth computer knowledge. Users are therefore | |
| 23 # encouraged to load and test the software's suitability as regards their | |
| 24 # requirements in conditions enabling the security of their systems and/or | |
| 25 # data to be ensured and, more generally, to use and operate it in the | |
| 26 # same conditions as regards security. | |
| 27 # | |
| 28 # The fact that you are presently reading this means that you have had | |
| 29 # knowledge of the CeCILL license and that you accept its terms. | |
| 30 | |
| 31 | |
| 32 import os | |
| 33 import glob | |
| 34 import shutil | |
| 35 import sys | |
| 36 import re | |
| 37 import math | |
| 38 try: | |
| 39 import hashlib | |
| 40 except: | |
| 41 pass | |
| 42 | |
| 43 | |
| 44 class FileUtils( object ): | |
| 45 | |
| 46 ## Return the number of lines in the given file | |
| 47 # | |
| 48 def getNbLinesInSingleFile( fileName ): | |
| 49 fileHandler = open( fileName, "r" ) | |
| 50 lines = fileHandler.readlines() | |
| 51 fileHandler.close() | |
| 52 if (len(lines)>0 and lines[-1]== "\n"): | |
| 53 return (len(lines)-1) | |
| 54 else : | |
| 55 return len(lines) | |
| 56 | |
| 57 getNbLinesInSingleFile = staticmethod( getNbLinesInSingleFile ) | |
| 58 | |
| 59 ## Return the number of lines in the files in the given list | |
| 60 # | |
| 61 def getNbLinesInFileList( lFileNames ): | |
| 62 count = 0 | |
| 63 for fileName in lFileNames: | |
| 64 count += FileUtils.getNbLinesInSingleFile( fileName ) | |
| 65 return count | |
| 66 | |
| 67 getNbLinesInFileList = staticmethod( getNbLinesInFileList ) | |
| 68 | |
| 69 ## Return True if the given file exists, False otherwise | |
| 70 # | |
| 71 def isRessourceExists( fileName ): | |
| 72 return os.path.exists( fileName ) | |
| 73 | |
| 74 isRessourceExists = staticmethod( isRessourceExists ) | |
| 75 | |
| 76 ## Return True if the given file is empty, False otherwise | |
| 77 # | |
| 78 def isEmpty( fileName ): | |
| 79 return 0 == FileUtils.getNbLinesInSingleFile( fileName ) | |
| 80 | |
| 81 isEmpty = staticmethod( isEmpty ) | |
| 82 | |
| 83 ## Return True if both files are identical, False otherwise | |
| 84 # | |
| 85 def are2FilesIdentical( file1, file2 ): | |
| 86 tmpFile = "diff_%s_%s" % ( os.path.basename(file1), os.path.basename(file2) ) | |
| 87 cmd = "diff %s %s >> %s" % ( file1, file2, tmpFile ) | |
| 88 returnStatus = os.system( cmd ) | |
| 89 if returnStatus != 0: | |
| 90 msg = "ERROR: 'diff' returned '%i'" % ( returnStatus ) | |
| 91 sys.stderr.write( "%s\n" % msg ) | |
| 92 sys.stderr.flush() | |
| 93 os.remove( tmpFile ) | |
| 94 return False | |
| 95 if FileUtils.isEmpty( tmpFile ): | |
| 96 os.remove( tmpFile ) | |
| 97 return True | |
| 98 else: | |
| 99 os.remove( tmpFile ) | |
| 100 return False | |
| 101 | |
| 102 are2FilesIdentical = staticmethod( are2FilesIdentical ) | |
| 103 | |
| 104 ## Return a string with all the content of the files in the given list | |
| 105 # | |
| 106 def getFileContent( lFiles ): | |
| 107 content = "" | |
| 108 lFiles.sort() | |
| 109 for fileName in lFiles: | |
| 110 currentFile = open( fileName, "r" ) | |
| 111 content += currentFile.read() | |
| 112 currentFile.close() | |
| 113 return content | |
| 114 | |
| 115 getFileContent = staticmethod( getFileContent ) | |
| 116 | |
| 117 ## Save content of the given file after having sorted it | |
| 118 # | |
| 119 def sortFileContent( inFile, outFile="" ): | |
| 120 inFileHandler = open(inFile, "r" ) | |
| 121 lines = inFileHandler.readlines() | |
| 122 inFileHandler.close() | |
| 123 lines.sort() | |
| 124 if outFile == "": | |
| 125 outFile = inFile | |
| 126 outFileHandler = open( outFile, "w" ) | |
| 127 outFileHandler.writelines( lines ) | |
| 128 outFileHandler.close() | |
| 129 | |
| 130 sortFileContent = staticmethod( sortFileContent ) | |
| 131 | |
| 132 ## Add end-of-line symbol to the given file content if necessary | |
| 133 # | |
| 134 def addNewLineAtTheEndOfFileContent( fileContent ): | |
| 135 if not fileContent.endswith('\n') and len(fileContent) != 0: | |
| 136 fileContent += '\n' | |
| 137 return fileContent | |
| 138 | |
| 139 addNewLineAtTheEndOfFileContent = staticmethod( addNewLineAtTheEndOfFileContent ) | |
| 140 | |
| 141 ## Concatenate files in the given list | |
| 142 # | |
| 143 def catFilesFromList( lFiles, outFile, sort=True, skipHeaders = False, separator = "" ): | |
| 144 if sort: | |
| 145 lFiles.sort() | |
| 146 outFileHandler = open( outFile, "a" ) | |
| 147 isFirstFile = True | |
| 148 for singleFile in lFiles: | |
| 149 if not isFirstFile: | |
| 150 outFileHandler.write(separator) | |
| 151 isFirstFile = False | |
| 152 singleFileHandler = open( singleFile, "r" ) | |
| 153 if skipHeaders: | |
| 154 singleFileHandler.readline() | |
| 155 line = singleFileHandler.readline() | |
| 156 while line: | |
| 157 outFileHandler.write(line) | |
| 158 line = singleFileHandler.readline() | |
| 159 singleFileHandler.close() | |
| 160 outFileHandler.close() | |
| 161 | |
| 162 catFilesFromList = staticmethod( catFilesFromList ) | |
| 163 | |
| 164 ## Concatenate files according to the given pattern | |
| 165 # | |
| 166 def catFilesByPattern( pattern, outFile, skipHeaders = False, separator = "" ): | |
| 167 lFiles = glob.glob( pattern ) | |
| 168 FileUtils.catFilesFromList( lFiles, outFile, skipHeaders = skipHeaders, separator = separator ) | |
| 169 | |
| 170 catFilesByPattern = staticmethod( catFilesByPattern ) | |
| 171 | |
| 172 ## Remove files listed according to the given pattern | |
| 173 # | |
| 174 # @example prefix="/home/tmp/dummy*.txt" | |
| 175 # | |
| 176 def removeFilesByPattern( prefix ): | |
| 177 lFiles = glob.glob( prefix ) | |
| 178 for f in lFiles: | |
| 179 os.remove( f ) | |
| 180 | |
| 181 removeFilesByPattern = staticmethod( removeFilesByPattern ) | |
| 182 | |
| 183 ## Remove files listed according to the suffixes in the given list | |
| 184 # | |
| 185 def removeFilesBySuffixList( targetPath, lSuffixes ): | |
| 186 if targetPath[-1] == "/": | |
| 187 targetPath = targetPath[:-1] | |
| 188 for suffix in lSuffixes: | |
| 189 pattern = "%s/*%s" % ( targetPath, suffix ) | |
| 190 FileUtils.removeFilesByPattern( pattern ) | |
| 191 | |
| 192 removeFilesBySuffixList = staticmethod( removeFilesBySuffixList ) | |
| 193 | |
| 194 ## Remove repeated blanks in the given file | |
| 195 # | |
| 196 def removeRepeatedBlanks( inFile, outFile="" ): | |
| 197 if outFile == "": | |
| 198 outFile = inFile | |
| 199 tmpFile = "tr_%s_%s" % ( inFile, outFile ) | |
| 200 cmd = "tr -s ' ' < %s > %s" % ( inFile, tmpFile ) | |
| 201 os.system( cmd ) | |
| 202 os.rename( tmpFile, outFile ) | |
| 203 | |
| 204 removeRepeatedBlanks = staticmethod( removeRepeatedBlanks ) | |
| 205 | |
| 206 ## Remove files in the given list | |
| 207 # | |
| 208 @staticmethod | |
| 209 def removeFilesFromList(lFiles): | |
| 210 for f in lFiles: | |
| 211 os.remove(f) | |
| 212 | |
| 213 ## Remove files in the given list if exist | |
| 214 # | |
| 215 @staticmethod | |
| 216 def removeFilesFromListIfExist(lFiles): | |
| 217 for fileName in lFiles: | |
| 218 if FileUtils.isRessourceExists(fileName): | |
| 219 os.remove(fileName) | |
| 220 | |
| 221 ## Append the content of a file to another file | |
| 222 # | |
| 223 # @param inFile string name of the input file | |
| 224 # @param outFile string name of the output file | |
| 225 # | |
| 226 def appendFileContent( inFile, outFile ): | |
| 227 outFileHandler = open( outFile, "a" ) | |
| 228 inFileHandler = open( inFile, "r" ) | |
| 229 shutil.copyfileobj( inFileHandler, outFileHandler ) | |
| 230 inFileHandler.close() | |
| 231 outFileHandler.close() | |
| 232 | |
| 233 appendFileContent = staticmethod( appendFileContent ) | |
| 234 | |
| 235 | |
| 236 ## Replace Windows end-of-line by Unix end-of-line | |
| 237 # | |
| 238 def fromWindowsToUnixEof( inFile ): | |
| 239 tmpFile = "%s.tmp" % ( inFile ) | |
| 240 shutil.copyfile( inFile, tmpFile ) | |
| 241 os.remove( inFile ) | |
| 242 tmpFileHandler = open( tmpFile, "r" ) | |
| 243 inFileHandler = open( inFile, "w" ) | |
| 244 while True: | |
| 245 line = tmpFileHandler.readline() | |
| 246 if line == "": | |
| 247 break | |
| 248 inFileHandler.write( line.replace("\r\n","\n") ) | |
| 249 tmpFileHandler.close() | |
| 250 inFileHandler.close() | |
| 251 os.remove( tmpFile ) | |
| 252 | |
| 253 fromWindowsToUnixEof = staticmethod( fromWindowsToUnixEof ) | |
| 254 | |
| 255 | |
| 256 ## Remove duplicated lines in a file | |
| 257 # | |
| 258 # @note it preserves the initial order and handles blank lines | |
| 259 # | |
| 260 def removeDuplicatedLines( inFile ): | |
| 261 tmpFile = "%s.tmp" % ( inFile ) | |
| 262 shutil.copyfile( inFile, tmpFile ) | |
| 263 os.remove( inFile ) | |
| 264 | |
| 265 tmpFileHandler = open( tmpFile, "r" ) | |
| 266 lLines = list( tmpFileHandler.read().split("\n") ) | |
| 267 if lLines[-1] == "": | |
| 268 del lLines[-1] | |
| 269 sLines = set( lLines ) | |
| 270 tmpFileHandler.close() | |
| 271 os.remove( tmpFile ) | |
| 272 | |
| 273 inFileHandler = open( inFile, "w" ) | |
| 274 for line in lLines: | |
| 275 if line in sLines: | |
| 276 inFileHandler.write( "%s\n" % ( line ) ) | |
| 277 sLines.remove( line ) | |
| 278 inFileHandler.close() | |
| 279 | |
| 280 removeDuplicatedLines = staticmethod( removeDuplicatedLines ) | |
| 281 | |
| 282 | |
| 283 ## Write a list of lines in a given file | |
| 284 # | |
| 285 def writeLineListInFile( inFile, lLines ): | |
| 286 inFileHandler = open( inFile, "w" ) | |
| 287 for line in lLines: | |
| 288 inFileHandler.write( line ) | |
| 289 inFileHandler.close() | |
| 290 | |
| 291 writeLineListInFile = staticmethod( writeLineListInFile ) | |
| 292 | |
| 293 | |
| 294 ## Give the list of absolute path of each directory in the given directory | |
| 295 # | |
| 296 # @param rootPath string absolute path of the given directory | |
| 297 # | |
| 298 # @return lDirPath list of absolute directory path | |
| 299 # | |
| 300 def getAbsoluteDirectoryPathList(rootPath): | |
| 301 lDirPath = [] | |
| 302 lPaths = glob.glob(rootPath + "/*") | |
| 303 for ressource in lPaths: | |
| 304 if os.path.isdir(ressource) : | |
| 305 lDirPath.append(ressource) | |
| 306 return lDirPath | |
| 307 | |
| 308 getAbsoluteDirectoryPathList = staticmethod(getAbsoluteDirectoryPathList) | |
| 309 | |
| 310 | |
| 311 ## Get a sublist of which each element matches/doesn't match a pattern | |
| 312 # | |
| 313 # @param lPath string list of paths | |
| 314 # | |
| 315 # @param pattern string pattern | |
| 316 # | |
| 317 # @param match bool | |
| 318 # | |
| 319 # @return lPathMatching list of path matching pattern | |
| 320 # | |
| 321 def getSubListAccordingToPattern(lPath, pattern, match = True): | |
| 322 lPathMatching = [] | |
| 323 for path in lPath: | |
| 324 if match: | |
| 325 if re.match(".*%s.*" % pattern, path): | |
| 326 lPathMatching.append(path) | |
| 327 else: | |
| 328 if not re.match(".*%s.*" % pattern, path): | |
| 329 lPathMatching.append(path) | |
| 330 return lPathMatching | |
| 331 | |
| 332 getSubListAccordingToPattern = staticmethod(getSubListAccordingToPattern) | |
| 333 | |
| 334 | |
| 335 ## Give the list of file names found in the given directory | |
| 336 # | |
| 337 # @param dirPath string absolute path of the given directory | |
| 338 # | |
| 339 # @return lFilesInDir list of file names | |
| 340 # | |
| 341 def getFileNamesList( dirPath, patternFileFilter = ".*" ): | |
| 342 lFilesInDir = [] | |
| 343 lPaths = glob.glob( dirPath + "/*" ) | |
| 344 for ressource in lPaths: | |
| 345 if os.path.isfile( ressource ): | |
| 346 fileName = os.path.basename( ressource ) | |
| 347 if re.match(patternFileFilter, fileName): | |
| 348 lFilesInDir.append( fileName ) | |
| 349 return lFilesInDir | |
| 350 | |
| 351 getFileNamesList = staticmethod( getFileNamesList ) | |
| 352 | |
| 353 ## Return the MD5 sum of a file | |
| 354 # | |
| 355 def getMd5SecureHash( inFile ): | |
| 356 if "hashlib" in sys.modules: | |
| 357 md5 = hashlib.md5() | |
| 358 inFileHandler = open( inFile, "r" ) | |
| 359 while True: | |
| 360 line = inFileHandler.readline() | |
| 361 if line == "": | |
| 362 break | |
| 363 md5.update( line ) | |
| 364 inFileHandler.close() | |
| 365 return md5.hexdigest() | |
| 366 else: | |
| 367 return "" | |
| 368 | |
| 369 getMd5SecureHash = staticmethod( getMd5SecureHash ) | |
| 370 | |
| 371 ## Cat all files of a given directory | |
| 372 # | |
| 373 # @param dir string directory name | |
| 374 # @param outFileName string output file name | |
| 375 # | |
| 376 def catFilesOfDir(dir, outFileName): | |
| 377 lFiles = FileUtils.getFileNamesList(dir) | |
| 378 lFile2 = [] | |
| 379 for file in lFiles: | |
| 380 lFile2.append(dir + "/" + file) | |
| 381 FileUtils.catFilesFromList(lFile2, outFileName) | |
| 382 | |
| 383 catFilesOfDir = staticmethod(catFilesOfDir) | |
| 384 | |
| 385 ## Return True if size file > 0 octet | |
| 386 # | |
| 387 # @param fileName string file name | |
| 388 # | |
| 389 def isSizeNotNull(fileName): | |
| 390 size = os.path.getsize(fileName) | |
| 391 if size > 0: | |
| 392 return True | |
| 393 return False | |
| 394 | |
| 395 isSizeNotNull = staticmethod(isSizeNotNull) | |
| 396 | |
| 397 ## Split one file into N Files by lines | |
| 398 # | |
| 399 # @param fileName string file name | |
| 400 # @param N int number of files to create | |
| 401 # | |
| 402 @staticmethod | |
| 403 def splitFileIntoNFiles(fileName, N): | |
| 404 nbLine = FileUtils.getNbLinesInSingleFile(fileName) | |
| 405 nbLinesInEachFile = nbLine | |
| 406 if N > nbLine: | |
| 407 N = nbLine | |
| 408 if N != 0: | |
| 409 nbLinesInEachFile = math.ceil(float(nbLine) / N) | |
| 410 else: | |
| 411 N = 1 | |
| 412 filePrefix, fileExt = os.path.splitext(os.path.basename(fileName)) | |
| 413 fileHandler = open(fileName, "r") | |
| 414 for i in range(1,N+1): | |
| 415 with open("%s-%s%s" %(filePrefix, i, fileExt), "w") as f: | |
| 416 j = 0 | |
| 417 while j < nbLinesInEachFile: | |
| 418 j += 1 | |
| 419 f.write(fileHandler.readline()) | |
| 420 fileHandler.close() | |
| 421 | |
| 422 ## Split one file into files of N lines | |
| 423 # | |
| 424 # @param fileName string input file name | |
| 425 # @param N int lines number per files | |
| 426 # | |
| 427 @staticmethod | |
| 428 def splitFileAccordingToLineNumber(fileName, N): | |
| 429 filePrefix, fileExt = os.path.splitext(os.path.basename(fileName)) | |
| 430 with open(fileName) as inF: | |
| 431 fileNb = 1 | |
| 432 line = inF.readline() | |
| 433 if not line or N == 0: | |
| 434 outFileName = "%s-%s%s" %(filePrefix, fileNb, fileExt) | |
| 435 f = open(outFileName, "wb") | |
| 436 shutil.copyfileobj(open(fileName, "rb"), f) | |
| 437 f.close() | |
| 438 else: | |
| 439 while line: | |
| 440 outFileName = "%s-%s%s" %(filePrefix, fileNb, fileExt) | |
| 441 with open(outFileName, "w") as outF: | |
| 442 lineNb = 1 | |
| 443 while lineNb <= N and line: | |
| 444 outF.write(line) | |
| 445 line = inF.readline() | |
| 446 lineNb += 1 | |
| 447 fileNb += 1 |
