annotate commons/core/utils/FileUtils.py @ 69:1473ab954708 draft

Corrected bug in "CollapsedReads" XML file.
author m-zytnicki
date Wed, 18 Nov 2015 10:59:02 -0500
parents 44d5973c188c
children
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
36
44d5973c188c Uploaded
m-zytnicki
parents:
diff changeset
1 # Copyright INRA (Institut National de la Recherche Agronomique)
44d5973c188c Uploaded
m-zytnicki
parents:
diff changeset
2 # http://www.inra.fr
44d5973c188c Uploaded
m-zytnicki
parents:
diff changeset
3 # http://urgi.versailles.inra.fr
44d5973c188c Uploaded
m-zytnicki
parents:
diff changeset
4 #
44d5973c188c Uploaded
m-zytnicki
parents:
diff changeset
5 # This software is governed by the CeCILL license under French law and
44d5973c188c Uploaded
m-zytnicki
parents:
diff changeset
6 # abiding by the rules of distribution of free software. You can use,
44d5973c188c Uploaded
m-zytnicki
parents:
diff changeset
7 # modify and/ or redistribute the software under the terms of the CeCILL
44d5973c188c Uploaded
m-zytnicki
parents:
diff changeset
8 # license as circulated by CEA, CNRS and INRIA at the following URL
44d5973c188c Uploaded
m-zytnicki
parents:
diff changeset
9 # "http://www.cecill.info".
44d5973c188c Uploaded
m-zytnicki
parents:
diff changeset
10 #
44d5973c188c Uploaded
m-zytnicki
parents:
diff changeset
11 # As a counterpart to the access to the source code and rights to copy,
44d5973c188c Uploaded
m-zytnicki
parents:
diff changeset
12 # modify and redistribute granted by the license, users are provided only
44d5973c188c Uploaded
m-zytnicki
parents:
diff changeset
13 # with a limited warranty and the software's author, the holder of the
44d5973c188c Uploaded
m-zytnicki
parents:
diff changeset
14 # economic rights, and the successive licensors have only limited
44d5973c188c Uploaded
m-zytnicki
parents:
diff changeset
15 # liability.
44d5973c188c Uploaded
m-zytnicki
parents:
diff changeset
16 #
44d5973c188c Uploaded
m-zytnicki
parents:
diff changeset
17 # In this respect, the user's attention is drawn to the risks associated
44d5973c188c Uploaded
m-zytnicki
parents:
diff changeset
18 # with loading, using, modifying and/or developing or reproducing the
44d5973c188c Uploaded
m-zytnicki
parents:
diff changeset
19 # software by the user in light of its specific status of free software,
44d5973c188c Uploaded
m-zytnicki
parents:
diff changeset
20 # that may mean that it is complicated to manipulate, and that also
44d5973c188c Uploaded
m-zytnicki
parents:
diff changeset
21 # therefore means that it is reserved for developers and experienced
44d5973c188c Uploaded
m-zytnicki
parents:
diff changeset
22 # professionals having in-depth computer knowledge. Users are therefore
44d5973c188c Uploaded
m-zytnicki
parents:
diff changeset
23 # encouraged to load and test the software's suitability as regards their
44d5973c188c Uploaded
m-zytnicki
parents:
diff changeset
24 # requirements in conditions enabling the security of their systems and/or
44d5973c188c Uploaded
m-zytnicki
parents:
diff changeset
25 # data to be ensured and, more generally, to use and operate it in the
44d5973c188c Uploaded
m-zytnicki
parents:
diff changeset
26 # same conditions as regards security.
44d5973c188c Uploaded
m-zytnicki
parents:
diff changeset
27 #
44d5973c188c Uploaded
m-zytnicki
parents:
diff changeset
28 # The fact that you are presently reading this means that you have had
44d5973c188c Uploaded
m-zytnicki
parents:
diff changeset
29 # knowledge of the CeCILL license and that you accept its terms.
44d5973c188c Uploaded
m-zytnicki
parents:
diff changeset
30
44d5973c188c Uploaded
m-zytnicki
parents:
diff changeset
31
44d5973c188c Uploaded
m-zytnicki
parents:
diff changeset
32 import os
44d5973c188c Uploaded
m-zytnicki
parents:
diff changeset
33 import glob
44d5973c188c Uploaded
m-zytnicki
parents:
diff changeset
34 import shutil
44d5973c188c Uploaded
m-zytnicki
parents:
diff changeset
35 import sys
44d5973c188c Uploaded
m-zytnicki
parents:
diff changeset
36 import re
44d5973c188c Uploaded
m-zytnicki
parents:
diff changeset
37 import math
44d5973c188c Uploaded
m-zytnicki
parents:
diff changeset
38 try:
44d5973c188c Uploaded
m-zytnicki
parents:
diff changeset
39 import hashlib
44d5973c188c Uploaded
m-zytnicki
parents:
diff changeset
40 except:
44d5973c188c Uploaded
m-zytnicki
parents:
diff changeset
41 pass
44d5973c188c Uploaded
m-zytnicki
parents:
diff changeset
42
44d5973c188c Uploaded
m-zytnicki
parents:
diff changeset
43
44d5973c188c Uploaded
m-zytnicki
parents:
diff changeset
44 class FileUtils( object ):
44d5973c188c Uploaded
m-zytnicki
parents:
diff changeset
45
44d5973c188c Uploaded
m-zytnicki
parents:
diff changeset
46 ## Return the number of lines in the given file
44d5973c188c Uploaded
m-zytnicki
parents:
diff changeset
47 #
44d5973c188c Uploaded
m-zytnicki
parents:
diff changeset
48 def getNbLinesInSingleFile( fileName ):
44d5973c188c Uploaded
m-zytnicki
parents:
diff changeset
49 fileHandler = open( fileName, "r" )
44d5973c188c Uploaded
m-zytnicki
parents:
diff changeset
50 lines = fileHandler.readlines()
44d5973c188c Uploaded
m-zytnicki
parents:
diff changeset
51 fileHandler.close()
44d5973c188c Uploaded
m-zytnicki
parents:
diff changeset
52 if (len(lines)>0 and lines[-1]== "\n"):
44d5973c188c Uploaded
m-zytnicki
parents:
diff changeset
53 return (len(lines)-1)
44d5973c188c Uploaded
m-zytnicki
parents:
diff changeset
54 else :
44d5973c188c Uploaded
m-zytnicki
parents:
diff changeset
55 return len(lines)
44d5973c188c Uploaded
m-zytnicki
parents:
diff changeset
56
44d5973c188c Uploaded
m-zytnicki
parents:
diff changeset
57 getNbLinesInSingleFile = staticmethod( getNbLinesInSingleFile )
44d5973c188c Uploaded
m-zytnicki
parents:
diff changeset
58
44d5973c188c Uploaded
m-zytnicki
parents:
diff changeset
59 ## Return the number of lines in the files in the given list
44d5973c188c Uploaded
m-zytnicki
parents:
diff changeset
60 #
44d5973c188c Uploaded
m-zytnicki
parents:
diff changeset
61 def getNbLinesInFileList( lFileNames ):
44d5973c188c Uploaded
m-zytnicki
parents:
diff changeset
62 count = 0
44d5973c188c Uploaded
m-zytnicki
parents:
diff changeset
63 for fileName in lFileNames:
44d5973c188c Uploaded
m-zytnicki
parents:
diff changeset
64 count += FileUtils.getNbLinesInSingleFile( fileName )
44d5973c188c Uploaded
m-zytnicki
parents:
diff changeset
65 return count
44d5973c188c Uploaded
m-zytnicki
parents:
diff changeset
66
44d5973c188c Uploaded
m-zytnicki
parents:
diff changeset
67 getNbLinesInFileList = staticmethod( getNbLinesInFileList )
44d5973c188c Uploaded
m-zytnicki
parents:
diff changeset
68
44d5973c188c Uploaded
m-zytnicki
parents:
diff changeset
69 ## Return True if the given file exists, False otherwise
44d5973c188c Uploaded
m-zytnicki
parents:
diff changeset
70 #
44d5973c188c Uploaded
m-zytnicki
parents:
diff changeset
71 def isRessourceExists( fileName ):
44d5973c188c Uploaded
m-zytnicki
parents:
diff changeset
72 return os.path.exists( fileName )
44d5973c188c Uploaded
m-zytnicki
parents:
diff changeset
73
44d5973c188c Uploaded
m-zytnicki
parents:
diff changeset
74 isRessourceExists = staticmethod( isRessourceExists )
44d5973c188c Uploaded
m-zytnicki
parents:
diff changeset
75
44d5973c188c Uploaded
m-zytnicki
parents:
diff changeset
76 ## Return True if the given file is empty, False otherwise
44d5973c188c Uploaded
m-zytnicki
parents:
diff changeset
77 #
44d5973c188c Uploaded
m-zytnicki
parents:
diff changeset
78 def isEmpty( fileName ):
44d5973c188c Uploaded
m-zytnicki
parents:
diff changeset
79 return 0 == FileUtils.getNbLinesInSingleFile( fileName )
44d5973c188c Uploaded
m-zytnicki
parents:
diff changeset
80
44d5973c188c Uploaded
m-zytnicki
parents:
diff changeset
81 isEmpty = staticmethod( isEmpty )
44d5973c188c Uploaded
m-zytnicki
parents:
diff changeset
82
44d5973c188c Uploaded
m-zytnicki
parents:
diff changeset
83 ## Return True if both files are identical, False otherwise
44d5973c188c Uploaded
m-zytnicki
parents:
diff changeset
84 #
44d5973c188c Uploaded
m-zytnicki
parents:
diff changeset
85 def are2FilesIdentical( file1, file2 ):
44d5973c188c Uploaded
m-zytnicki
parents:
diff changeset
86 tmpFile = "diff_%s_%s" % ( os.path.basename(file1), os.path.basename(file2) )
44d5973c188c Uploaded
m-zytnicki
parents:
diff changeset
87 cmd = "diff %s %s >> %s" % ( file1, file2, tmpFile )
44d5973c188c Uploaded
m-zytnicki
parents:
diff changeset
88 returnStatus = os.system( cmd )
44d5973c188c Uploaded
m-zytnicki
parents:
diff changeset
89 if returnStatus != 0:
44d5973c188c Uploaded
m-zytnicki
parents:
diff changeset
90 print "WARNING: 'diff' returned '%i'" % returnStatus
44d5973c188c Uploaded
m-zytnicki
parents:
diff changeset
91 os.remove( tmpFile )
44d5973c188c Uploaded
m-zytnicki
parents:
diff changeset
92 return False
44d5973c188c Uploaded
m-zytnicki
parents:
diff changeset
93 if FileUtils.isEmpty( tmpFile ):
44d5973c188c Uploaded
m-zytnicki
parents:
diff changeset
94 os.remove( tmpFile )
44d5973c188c Uploaded
m-zytnicki
parents:
diff changeset
95 return True
44d5973c188c Uploaded
m-zytnicki
parents:
diff changeset
96 else:
44d5973c188c Uploaded
m-zytnicki
parents:
diff changeset
97 os.remove( tmpFile )
44d5973c188c Uploaded
m-zytnicki
parents:
diff changeset
98 return False
44d5973c188c Uploaded
m-zytnicki
parents:
diff changeset
99
44d5973c188c Uploaded
m-zytnicki
parents:
diff changeset
100 are2FilesIdentical = staticmethod( are2FilesIdentical )
44d5973c188c Uploaded
m-zytnicki
parents:
diff changeset
101
44d5973c188c Uploaded
m-zytnicki
parents:
diff changeset
102 ## Return a string with all the content of the files in the given list
44d5973c188c Uploaded
m-zytnicki
parents:
diff changeset
103 #
44d5973c188c Uploaded
m-zytnicki
parents:
diff changeset
104 def getFileContent( lFiles ):
44d5973c188c Uploaded
m-zytnicki
parents:
diff changeset
105 content = ""
44d5973c188c Uploaded
m-zytnicki
parents:
diff changeset
106 lFiles.sort()
44d5973c188c Uploaded
m-zytnicki
parents:
diff changeset
107 for fileName in lFiles:
44d5973c188c Uploaded
m-zytnicki
parents:
diff changeset
108 currentFile = open( fileName, "r" )
44d5973c188c Uploaded
m-zytnicki
parents:
diff changeset
109 content += currentFile.read()
44d5973c188c Uploaded
m-zytnicki
parents:
diff changeset
110 currentFile.close()
44d5973c188c Uploaded
m-zytnicki
parents:
diff changeset
111 return content
44d5973c188c Uploaded
m-zytnicki
parents:
diff changeset
112
44d5973c188c Uploaded
m-zytnicki
parents:
diff changeset
113 getFileContent = staticmethod( getFileContent )
44d5973c188c Uploaded
m-zytnicki
parents:
diff changeset
114
44d5973c188c Uploaded
m-zytnicki
parents:
diff changeset
115 ## Save content of the given file after having sorted it
44d5973c188c Uploaded
m-zytnicki
parents:
diff changeset
116 #
44d5973c188c Uploaded
m-zytnicki
parents:
diff changeset
117 def sortFileContent( inFile, outFile="" ):
44d5973c188c Uploaded
m-zytnicki
parents:
diff changeset
118 inFileHandler = open(inFile, "r" )
44d5973c188c Uploaded
m-zytnicki
parents:
diff changeset
119 lines = inFileHandler.readlines()
44d5973c188c Uploaded
m-zytnicki
parents:
diff changeset
120 inFileHandler.close()
44d5973c188c Uploaded
m-zytnicki
parents:
diff changeset
121 lines.sort()
44d5973c188c Uploaded
m-zytnicki
parents:
diff changeset
122 if outFile == "":
44d5973c188c Uploaded
m-zytnicki
parents:
diff changeset
123 outFile = inFile
44d5973c188c Uploaded
m-zytnicki
parents:
diff changeset
124 outFileHandler = open( outFile, "w" )
44d5973c188c Uploaded
m-zytnicki
parents:
diff changeset
125 outFileHandler.writelines( lines )
44d5973c188c Uploaded
m-zytnicki
parents:
diff changeset
126 outFileHandler.close()
44d5973c188c Uploaded
m-zytnicki
parents:
diff changeset
127
44d5973c188c Uploaded
m-zytnicki
parents:
diff changeset
128 sortFileContent = staticmethod( sortFileContent )
44d5973c188c Uploaded
m-zytnicki
parents:
diff changeset
129
44d5973c188c Uploaded
m-zytnicki
parents:
diff changeset
130 ## Add end-of-line symbol to the given file content if necessary
44d5973c188c Uploaded
m-zytnicki
parents:
diff changeset
131 #
44d5973c188c Uploaded
m-zytnicki
parents:
diff changeset
132 def addNewLineAtTheEndOfFileContent( fileContent ):
44d5973c188c Uploaded
m-zytnicki
parents:
diff changeset
133 if not fileContent.endswith('\n') and len(fileContent) != 0:
44d5973c188c Uploaded
m-zytnicki
parents:
diff changeset
134 fileContent += '\n'
44d5973c188c Uploaded
m-zytnicki
parents:
diff changeset
135 return fileContent
44d5973c188c Uploaded
m-zytnicki
parents:
diff changeset
136
44d5973c188c Uploaded
m-zytnicki
parents:
diff changeset
137 addNewLineAtTheEndOfFileContent = staticmethod( addNewLineAtTheEndOfFileContent )
44d5973c188c Uploaded
m-zytnicki
parents:
diff changeset
138
44d5973c188c Uploaded
m-zytnicki
parents:
diff changeset
139 ## Concatenate files in the given list
44d5973c188c Uploaded
m-zytnicki
parents:
diff changeset
140 #
44d5973c188c Uploaded
m-zytnicki
parents:
diff changeset
141 def catFilesFromList( lFiles, outFile, sort=True, skipHeaders = False, separator = "" ):
44d5973c188c Uploaded
m-zytnicki
parents:
diff changeset
142 if sort:
44d5973c188c Uploaded
m-zytnicki
parents:
diff changeset
143 lFiles.sort()
44d5973c188c Uploaded
m-zytnicki
parents:
diff changeset
144 outFileHandler = open( outFile, "a" )
44d5973c188c Uploaded
m-zytnicki
parents:
diff changeset
145 isFirstFile = True
44d5973c188c Uploaded
m-zytnicki
parents:
diff changeset
146 for singleFile in lFiles:
44d5973c188c Uploaded
m-zytnicki
parents:
diff changeset
147 if not isFirstFile:
44d5973c188c Uploaded
m-zytnicki
parents:
diff changeset
148 outFileHandler.write(separator)
44d5973c188c Uploaded
m-zytnicki
parents:
diff changeset
149 isFirstFile = False
44d5973c188c Uploaded
m-zytnicki
parents:
diff changeset
150 singleFileHandler = open( singleFile, "r" )
44d5973c188c Uploaded
m-zytnicki
parents:
diff changeset
151 if skipHeaders:
44d5973c188c Uploaded
m-zytnicki
parents:
diff changeset
152 singleFileHandler.readline()
44d5973c188c Uploaded
m-zytnicki
parents:
diff changeset
153 line = singleFileHandler.readline()
44d5973c188c Uploaded
m-zytnicki
parents:
diff changeset
154 while line:
44d5973c188c Uploaded
m-zytnicki
parents:
diff changeset
155 outFileHandler.write(line)
44d5973c188c Uploaded
m-zytnicki
parents:
diff changeset
156 line = singleFileHandler.readline()
44d5973c188c Uploaded
m-zytnicki
parents:
diff changeset
157 singleFileHandler.close()
44d5973c188c Uploaded
m-zytnicki
parents:
diff changeset
158 outFileHandler.close()
44d5973c188c Uploaded
m-zytnicki
parents:
diff changeset
159
44d5973c188c Uploaded
m-zytnicki
parents:
diff changeset
160 catFilesFromList = staticmethod( catFilesFromList )
44d5973c188c Uploaded
m-zytnicki
parents:
diff changeset
161
44d5973c188c Uploaded
m-zytnicki
parents:
diff changeset
162 ## Concatenate files according to the given pattern
44d5973c188c Uploaded
m-zytnicki
parents:
diff changeset
163 #
44d5973c188c Uploaded
m-zytnicki
parents:
diff changeset
164 def catFilesByPattern( pattern, outFile, skipHeaders = False, separator = "" ):
44d5973c188c Uploaded
m-zytnicki
parents:
diff changeset
165 lFiles = glob.glob( pattern )
44d5973c188c Uploaded
m-zytnicki
parents:
diff changeset
166 FileUtils.catFilesFromList( lFiles, outFile, skipHeaders = skipHeaders, separator = separator )
44d5973c188c Uploaded
m-zytnicki
parents:
diff changeset
167
44d5973c188c Uploaded
m-zytnicki
parents:
diff changeset
168 catFilesByPattern = staticmethod( catFilesByPattern )
44d5973c188c Uploaded
m-zytnicki
parents:
diff changeset
169
44d5973c188c Uploaded
m-zytnicki
parents:
diff changeset
170 ## Remove files listed according to the given pattern
44d5973c188c Uploaded
m-zytnicki
parents:
diff changeset
171 #
44d5973c188c Uploaded
m-zytnicki
parents:
diff changeset
172 # @example prefix="/home/tmp/dummy*.txt"
44d5973c188c Uploaded
m-zytnicki
parents:
diff changeset
173 #
44d5973c188c Uploaded
m-zytnicki
parents:
diff changeset
174 def removeFilesByPattern( prefix ):
44d5973c188c Uploaded
m-zytnicki
parents:
diff changeset
175 lFiles = glob.glob( prefix )
44d5973c188c Uploaded
m-zytnicki
parents:
diff changeset
176 for f in lFiles:
44d5973c188c Uploaded
m-zytnicki
parents:
diff changeset
177 os.remove( f )
44d5973c188c Uploaded
m-zytnicki
parents:
diff changeset
178
44d5973c188c Uploaded
m-zytnicki
parents:
diff changeset
179 removeFilesByPattern = staticmethod( removeFilesByPattern )
44d5973c188c Uploaded
m-zytnicki
parents:
diff changeset
180
44d5973c188c Uploaded
m-zytnicki
parents:
diff changeset
181 ## Remove files listed according to the suffixes in the given list
44d5973c188c Uploaded
m-zytnicki
parents:
diff changeset
182 #
44d5973c188c Uploaded
m-zytnicki
parents:
diff changeset
183 def removeFilesBySuffixList( targetPath, lSuffixes ):
44d5973c188c Uploaded
m-zytnicki
parents:
diff changeset
184 if targetPath[-1] == "/":
44d5973c188c Uploaded
m-zytnicki
parents:
diff changeset
185 targetPath = targetPath[:-1]
44d5973c188c Uploaded
m-zytnicki
parents:
diff changeset
186 for suffix in lSuffixes:
44d5973c188c Uploaded
m-zytnicki
parents:
diff changeset
187 pattern = "%s/*%s" % ( targetPath, suffix )
44d5973c188c Uploaded
m-zytnicki
parents:
diff changeset
188 FileUtils.removeFilesByPattern( pattern )
44d5973c188c Uploaded
m-zytnicki
parents:
diff changeset
189
44d5973c188c Uploaded
m-zytnicki
parents:
diff changeset
190 removeFilesBySuffixList = staticmethod( removeFilesBySuffixList )
44d5973c188c Uploaded
m-zytnicki
parents:
diff changeset
191
44d5973c188c Uploaded
m-zytnicki
parents:
diff changeset
192 ## Remove repeated blanks in the given file
44d5973c188c Uploaded
m-zytnicki
parents:
diff changeset
193 #
44d5973c188c Uploaded
m-zytnicki
parents:
diff changeset
194 def removeRepeatedBlanks( inFile, outFile="" ):
44d5973c188c Uploaded
m-zytnicki
parents:
diff changeset
195 if outFile == "":
44d5973c188c Uploaded
m-zytnicki
parents:
diff changeset
196 outFile = inFile
44d5973c188c Uploaded
m-zytnicki
parents:
diff changeset
197 tmpFile = "tr_%s_%s" % ( inFile, outFile )
44d5973c188c Uploaded
m-zytnicki
parents:
diff changeset
198 cmd = "tr -s ' ' < %s > %s" % ( inFile, tmpFile )
44d5973c188c Uploaded
m-zytnicki
parents:
diff changeset
199 os.system( cmd )
44d5973c188c Uploaded
m-zytnicki
parents:
diff changeset
200 os.rename( tmpFile, outFile )
44d5973c188c Uploaded
m-zytnicki
parents:
diff changeset
201
44d5973c188c Uploaded
m-zytnicki
parents:
diff changeset
202 removeRepeatedBlanks = staticmethod( removeRepeatedBlanks )
44d5973c188c Uploaded
m-zytnicki
parents:
diff changeset
203
44d5973c188c Uploaded
m-zytnicki
parents:
diff changeset
204 ## Remove files in the given list
44d5973c188c Uploaded
m-zytnicki
parents:
diff changeset
205 #
44d5973c188c Uploaded
m-zytnicki
parents:
diff changeset
206 @staticmethod
44d5973c188c Uploaded
m-zytnicki
parents:
diff changeset
207 def removeFilesFromList(lFiles):
44d5973c188c Uploaded
m-zytnicki
parents:
diff changeset
208 for f in lFiles:
44d5973c188c Uploaded
m-zytnicki
parents:
diff changeset
209 os.remove(f)
44d5973c188c Uploaded
m-zytnicki
parents:
diff changeset
210
44d5973c188c Uploaded
m-zytnicki
parents:
diff changeset
211 ## Remove files in the given list if exist
44d5973c188c Uploaded
m-zytnicki
parents:
diff changeset
212 #
44d5973c188c Uploaded
m-zytnicki
parents:
diff changeset
213 @staticmethod
44d5973c188c Uploaded
m-zytnicki
parents:
diff changeset
214 def removeFilesFromListIfExist(lFiles):
44d5973c188c Uploaded
m-zytnicki
parents:
diff changeset
215 for fileName in lFiles:
44d5973c188c Uploaded
m-zytnicki
parents:
diff changeset
216 if FileUtils.isRessourceExists(fileName):
44d5973c188c Uploaded
m-zytnicki
parents:
diff changeset
217 os.remove(fileName)
44d5973c188c Uploaded
m-zytnicki
parents:
diff changeset
218
44d5973c188c Uploaded
m-zytnicki
parents:
diff changeset
219 ## Append the content of a file to another file
44d5973c188c Uploaded
m-zytnicki
parents:
diff changeset
220 #
44d5973c188c Uploaded
m-zytnicki
parents:
diff changeset
221 # @param inFile string name of the input file
44d5973c188c Uploaded
m-zytnicki
parents:
diff changeset
222 # @param outFile string name of the output file
44d5973c188c Uploaded
m-zytnicki
parents:
diff changeset
223 #
44d5973c188c Uploaded
m-zytnicki
parents:
diff changeset
224 def appendFileContent( inFile, outFile ):
44d5973c188c Uploaded
m-zytnicki
parents:
diff changeset
225 outFileHandler = open( outFile, "a" )
44d5973c188c Uploaded
m-zytnicki
parents:
diff changeset
226 inFileHandler = open( inFile, "r" )
44d5973c188c Uploaded
m-zytnicki
parents:
diff changeset
227 shutil.copyfileobj( inFileHandler, outFileHandler )
44d5973c188c Uploaded
m-zytnicki
parents:
diff changeset
228 inFileHandler.close()
44d5973c188c Uploaded
m-zytnicki
parents:
diff changeset
229 outFileHandler.close()
44d5973c188c Uploaded
m-zytnicki
parents:
diff changeset
230
44d5973c188c Uploaded
m-zytnicki
parents:
diff changeset
231 appendFileContent = staticmethod( appendFileContent )
44d5973c188c Uploaded
m-zytnicki
parents:
diff changeset
232
44d5973c188c Uploaded
m-zytnicki
parents:
diff changeset
233
44d5973c188c Uploaded
m-zytnicki
parents:
diff changeset
234 ## Replace Windows end-of-line by Unix end-of-line
44d5973c188c Uploaded
m-zytnicki
parents:
diff changeset
235 #
44d5973c188c Uploaded
m-zytnicki
parents:
diff changeset
236 def fromWindowsToUnixEof( inFile ):
44d5973c188c Uploaded
m-zytnicki
parents:
diff changeset
237 tmpFile = "%s.tmp" % ( inFile )
44d5973c188c Uploaded
m-zytnicki
parents:
diff changeset
238 shutil.copyfile( inFile, tmpFile )
44d5973c188c Uploaded
m-zytnicki
parents:
diff changeset
239 os.remove( inFile )
44d5973c188c Uploaded
m-zytnicki
parents:
diff changeset
240 tmpFileHandler = open( tmpFile, "r" )
44d5973c188c Uploaded
m-zytnicki
parents:
diff changeset
241 inFileHandler = open( inFile, "w" )
44d5973c188c Uploaded
m-zytnicki
parents:
diff changeset
242 while True:
44d5973c188c Uploaded
m-zytnicki
parents:
diff changeset
243 line = tmpFileHandler.readline()
44d5973c188c Uploaded
m-zytnicki
parents:
diff changeset
244 if line == "":
44d5973c188c Uploaded
m-zytnicki
parents:
diff changeset
245 break
44d5973c188c Uploaded
m-zytnicki
parents:
diff changeset
246 inFileHandler.write( line.replace("\r\n","\n") )
44d5973c188c Uploaded
m-zytnicki
parents:
diff changeset
247 tmpFileHandler.close()
44d5973c188c Uploaded
m-zytnicki
parents:
diff changeset
248 inFileHandler.close()
44d5973c188c Uploaded
m-zytnicki
parents:
diff changeset
249 os.remove( tmpFile )
44d5973c188c Uploaded
m-zytnicki
parents:
diff changeset
250
44d5973c188c Uploaded
m-zytnicki
parents:
diff changeset
251 fromWindowsToUnixEof = staticmethod( fromWindowsToUnixEof )
44d5973c188c Uploaded
m-zytnicki
parents:
diff changeset
252
44d5973c188c Uploaded
m-zytnicki
parents:
diff changeset
253
44d5973c188c Uploaded
m-zytnicki
parents:
diff changeset
254 ## Remove duplicated lines in a file
44d5973c188c Uploaded
m-zytnicki
parents:
diff changeset
255 #
44d5973c188c Uploaded
m-zytnicki
parents:
diff changeset
256 # @note it preserves the initial order and handles blank lines
44d5973c188c Uploaded
m-zytnicki
parents:
diff changeset
257 #
44d5973c188c Uploaded
m-zytnicki
parents:
diff changeset
258 def removeDuplicatedLines( inFile ):
44d5973c188c Uploaded
m-zytnicki
parents:
diff changeset
259 tmpFile = "%s.tmp" % ( inFile )
44d5973c188c Uploaded
m-zytnicki
parents:
diff changeset
260 shutil.copyfile( inFile, tmpFile )
44d5973c188c Uploaded
m-zytnicki
parents:
diff changeset
261 os.remove( inFile )
44d5973c188c Uploaded
m-zytnicki
parents:
diff changeset
262
44d5973c188c Uploaded
m-zytnicki
parents:
diff changeset
263 tmpFileHandler = open( tmpFile, "r" )
44d5973c188c Uploaded
m-zytnicki
parents:
diff changeset
264 lLines = list( tmpFileHandler.read().split("\n") )
44d5973c188c Uploaded
m-zytnicki
parents:
diff changeset
265 if lLines[-1] == "":
44d5973c188c Uploaded
m-zytnicki
parents:
diff changeset
266 del lLines[-1]
44d5973c188c Uploaded
m-zytnicki
parents:
diff changeset
267 sLines = set( lLines )
44d5973c188c Uploaded
m-zytnicki
parents:
diff changeset
268 tmpFileHandler.close()
44d5973c188c Uploaded
m-zytnicki
parents:
diff changeset
269 os.remove( tmpFile )
44d5973c188c Uploaded
m-zytnicki
parents:
diff changeset
270
44d5973c188c Uploaded
m-zytnicki
parents:
diff changeset
271 inFileHandler = open( inFile, "w" )
44d5973c188c Uploaded
m-zytnicki
parents:
diff changeset
272 for line in lLines:
44d5973c188c Uploaded
m-zytnicki
parents:
diff changeset
273 if line in sLines:
44d5973c188c Uploaded
m-zytnicki
parents:
diff changeset
274 inFileHandler.write( "%s\n" % ( line ) )
44d5973c188c Uploaded
m-zytnicki
parents:
diff changeset
275 sLines.remove( line )
44d5973c188c Uploaded
m-zytnicki
parents:
diff changeset
276 inFileHandler.close()
44d5973c188c Uploaded
m-zytnicki
parents:
diff changeset
277
44d5973c188c Uploaded
m-zytnicki
parents:
diff changeset
278 removeDuplicatedLines = staticmethod( removeDuplicatedLines )
44d5973c188c Uploaded
m-zytnicki
parents:
diff changeset
279
44d5973c188c Uploaded
m-zytnicki
parents:
diff changeset
280
44d5973c188c Uploaded
m-zytnicki
parents:
diff changeset
281 ## Write a list of lines in a given file
44d5973c188c Uploaded
m-zytnicki
parents:
diff changeset
282 #
44d5973c188c Uploaded
m-zytnicki
parents:
diff changeset
283 def writeLineListInFile( inFile, lLines ):
44d5973c188c Uploaded
m-zytnicki
parents:
diff changeset
284 inFileHandler = open( inFile, "w" )
44d5973c188c Uploaded
m-zytnicki
parents:
diff changeset
285 for line in lLines:
44d5973c188c Uploaded
m-zytnicki
parents:
diff changeset
286 inFileHandler.write( line )
44d5973c188c Uploaded
m-zytnicki
parents:
diff changeset
287 inFileHandler.close()
44d5973c188c Uploaded
m-zytnicki
parents:
diff changeset
288
44d5973c188c Uploaded
m-zytnicki
parents:
diff changeset
289 writeLineListInFile = staticmethod( writeLineListInFile )
44d5973c188c Uploaded
m-zytnicki
parents:
diff changeset
290
44d5973c188c Uploaded
m-zytnicki
parents:
diff changeset
291
44d5973c188c Uploaded
m-zytnicki
parents:
diff changeset
292 ## Give the list of absolute path of each directory in the given directory
44d5973c188c Uploaded
m-zytnicki
parents:
diff changeset
293 #
44d5973c188c Uploaded
m-zytnicki
parents:
diff changeset
294 # @param rootPath string absolute path of the given directory
44d5973c188c Uploaded
m-zytnicki
parents:
diff changeset
295 #
44d5973c188c Uploaded
m-zytnicki
parents:
diff changeset
296 # @return lDirPath list of absolute directory path
44d5973c188c Uploaded
m-zytnicki
parents:
diff changeset
297 #
44d5973c188c Uploaded
m-zytnicki
parents:
diff changeset
298 def getAbsoluteDirectoryPathList(rootPath):
44d5973c188c Uploaded
m-zytnicki
parents:
diff changeset
299 lDirPath = []
44d5973c188c Uploaded
m-zytnicki
parents:
diff changeset
300 lPaths = glob.glob(rootPath + "/*")
44d5973c188c Uploaded
m-zytnicki
parents:
diff changeset
301 for ressource in lPaths:
44d5973c188c Uploaded
m-zytnicki
parents:
diff changeset
302 if os.path.isdir(ressource) :
44d5973c188c Uploaded
m-zytnicki
parents:
diff changeset
303 lDirPath.append(ressource)
44d5973c188c Uploaded
m-zytnicki
parents:
diff changeset
304 return lDirPath
44d5973c188c Uploaded
m-zytnicki
parents:
diff changeset
305
44d5973c188c Uploaded
m-zytnicki
parents:
diff changeset
306 getAbsoluteDirectoryPathList = staticmethod(getAbsoluteDirectoryPathList)
44d5973c188c Uploaded
m-zytnicki
parents:
diff changeset
307
44d5973c188c Uploaded
m-zytnicki
parents:
diff changeset
308
44d5973c188c Uploaded
m-zytnicki
parents:
diff changeset
309 ## Get a sublist of which each element matches/doesn't match a pattern
44d5973c188c Uploaded
m-zytnicki
parents:
diff changeset
310 #
44d5973c188c Uploaded
m-zytnicki
parents:
diff changeset
311 # @param lPath string list of paths
44d5973c188c Uploaded
m-zytnicki
parents:
diff changeset
312 #
44d5973c188c Uploaded
m-zytnicki
parents:
diff changeset
313 # @param pattern string pattern
44d5973c188c Uploaded
m-zytnicki
parents:
diff changeset
314 #
44d5973c188c Uploaded
m-zytnicki
parents:
diff changeset
315 # @param match bool
44d5973c188c Uploaded
m-zytnicki
parents:
diff changeset
316 #
44d5973c188c Uploaded
m-zytnicki
parents:
diff changeset
317 # @return lPathMatching list of path matching pattern
44d5973c188c Uploaded
m-zytnicki
parents:
diff changeset
318 #
44d5973c188c Uploaded
m-zytnicki
parents:
diff changeset
319 def getSubListAccordingToPattern(lPath, pattern, match = True):
44d5973c188c Uploaded
m-zytnicki
parents:
diff changeset
320 lPathMatching = []
44d5973c188c Uploaded
m-zytnicki
parents:
diff changeset
321 for path in lPath:
44d5973c188c Uploaded
m-zytnicki
parents:
diff changeset
322 if match:
44d5973c188c Uploaded
m-zytnicki
parents:
diff changeset
323 if re.match(".*%s.*" % pattern, path):
44d5973c188c Uploaded
m-zytnicki
parents:
diff changeset
324 lPathMatching.append(path)
44d5973c188c Uploaded
m-zytnicki
parents:
diff changeset
325 else:
44d5973c188c Uploaded
m-zytnicki
parents:
diff changeset
326 if not re.match(".*%s.*" % pattern, path):
44d5973c188c Uploaded
m-zytnicki
parents:
diff changeset
327 lPathMatching.append(path)
44d5973c188c Uploaded
m-zytnicki
parents:
diff changeset
328 return lPathMatching
44d5973c188c Uploaded
m-zytnicki
parents:
diff changeset
329
44d5973c188c Uploaded
m-zytnicki
parents:
diff changeset
330 getSubListAccordingToPattern = staticmethod(getSubListAccordingToPattern)
44d5973c188c Uploaded
m-zytnicki
parents:
diff changeset
331
44d5973c188c Uploaded
m-zytnicki
parents:
diff changeset
332
44d5973c188c Uploaded
m-zytnicki
parents:
diff changeset
333 ## Give the list of file names found in the given directory
44d5973c188c Uploaded
m-zytnicki
parents:
diff changeset
334 #
44d5973c188c Uploaded
m-zytnicki
parents:
diff changeset
335 # @param dirPath string absolute path of the given directory
44d5973c188c Uploaded
m-zytnicki
parents:
diff changeset
336 #
44d5973c188c Uploaded
m-zytnicki
parents:
diff changeset
337 # @return lFilesInDir list of file names
44d5973c188c Uploaded
m-zytnicki
parents:
diff changeset
338 #
44d5973c188c Uploaded
m-zytnicki
parents:
diff changeset
339 def getFileNamesList( dirPath, patternFileFilter = ".*" ):
44d5973c188c Uploaded
m-zytnicki
parents:
diff changeset
340 lFilesInDir = []
44d5973c188c Uploaded
m-zytnicki
parents:
diff changeset
341 lPaths = glob.glob( dirPath + "/*" )
44d5973c188c Uploaded
m-zytnicki
parents:
diff changeset
342 for ressource in lPaths:
44d5973c188c Uploaded
m-zytnicki
parents:
diff changeset
343 if os.path.isfile( ressource ):
44d5973c188c Uploaded
m-zytnicki
parents:
diff changeset
344 fileName = os.path.basename( ressource )
44d5973c188c Uploaded
m-zytnicki
parents:
diff changeset
345 if re.match(patternFileFilter, fileName):
44d5973c188c Uploaded
m-zytnicki
parents:
diff changeset
346 lFilesInDir.append( fileName )
44d5973c188c Uploaded
m-zytnicki
parents:
diff changeset
347 return lFilesInDir
44d5973c188c Uploaded
m-zytnicki
parents:
diff changeset
348
44d5973c188c Uploaded
m-zytnicki
parents:
diff changeset
349 getFileNamesList = staticmethod( getFileNamesList )
44d5973c188c Uploaded
m-zytnicki
parents:
diff changeset
350
44d5973c188c Uploaded
m-zytnicki
parents:
diff changeset
351 ## Return the MD5 sum of a file
44d5973c188c Uploaded
m-zytnicki
parents:
diff changeset
352 #
44d5973c188c Uploaded
m-zytnicki
parents:
diff changeset
353 def getMd5SecureHash( inFile ):
44d5973c188c Uploaded
m-zytnicki
parents:
diff changeset
354 if "hashlib" in sys.modules:
44d5973c188c Uploaded
m-zytnicki
parents:
diff changeset
355 md5 = hashlib.md5()
44d5973c188c Uploaded
m-zytnicki
parents:
diff changeset
356 inFileHandler = open( inFile, "r" )
44d5973c188c Uploaded
m-zytnicki
parents:
diff changeset
357 while True:
44d5973c188c Uploaded
m-zytnicki
parents:
diff changeset
358 line = inFileHandler.readline()
44d5973c188c Uploaded
m-zytnicki
parents:
diff changeset
359 if line == "":
44d5973c188c Uploaded
m-zytnicki
parents:
diff changeset
360 break
44d5973c188c Uploaded
m-zytnicki
parents:
diff changeset
361 md5.update( line )
44d5973c188c Uploaded
m-zytnicki
parents:
diff changeset
362 inFileHandler.close()
44d5973c188c Uploaded
m-zytnicki
parents:
diff changeset
363 return md5.hexdigest()
44d5973c188c Uploaded
m-zytnicki
parents:
diff changeset
364 else:
44d5973c188c Uploaded
m-zytnicki
parents:
diff changeset
365 return ""
44d5973c188c Uploaded
m-zytnicki
parents:
diff changeset
366
44d5973c188c Uploaded
m-zytnicki
parents:
diff changeset
367 getMd5SecureHash = staticmethod( getMd5SecureHash )
44d5973c188c Uploaded
m-zytnicki
parents:
diff changeset
368
44d5973c188c Uploaded
m-zytnicki
parents:
diff changeset
369 ## Cat all files of a given directory
44d5973c188c Uploaded
m-zytnicki
parents:
diff changeset
370 #
44d5973c188c Uploaded
m-zytnicki
parents:
diff changeset
371 # @param dir string directory name
44d5973c188c Uploaded
m-zytnicki
parents:
diff changeset
372 # @param outFileName string output file name
44d5973c188c Uploaded
m-zytnicki
parents:
diff changeset
373 #
44d5973c188c Uploaded
m-zytnicki
parents:
diff changeset
374 def catFilesOfDir(dir, outFileName):
44d5973c188c Uploaded
m-zytnicki
parents:
diff changeset
375 lFiles = FileUtils.getFileNamesList(dir)
44d5973c188c Uploaded
m-zytnicki
parents:
diff changeset
376 lFile2 = []
44d5973c188c Uploaded
m-zytnicki
parents:
diff changeset
377 for file in lFiles:
44d5973c188c Uploaded
m-zytnicki
parents:
diff changeset
378 lFile2.append(dir + "/" + file)
44d5973c188c Uploaded
m-zytnicki
parents:
diff changeset
379 FileUtils.catFilesFromList(lFile2, outFileName)
44d5973c188c Uploaded
m-zytnicki
parents:
diff changeset
380
44d5973c188c Uploaded
m-zytnicki
parents:
diff changeset
381 catFilesOfDir = staticmethod(catFilesOfDir)
44d5973c188c Uploaded
m-zytnicki
parents:
diff changeset
382
44d5973c188c Uploaded
m-zytnicki
parents:
diff changeset
383 ## Return True if size file > 0 octet
44d5973c188c Uploaded
m-zytnicki
parents:
diff changeset
384 #
44d5973c188c Uploaded
m-zytnicki
parents:
diff changeset
385 # @param fileName string file name
44d5973c188c Uploaded
m-zytnicki
parents:
diff changeset
386 #
44d5973c188c Uploaded
m-zytnicki
parents:
diff changeset
387 def isSizeNotNull(fileName):
44d5973c188c Uploaded
m-zytnicki
parents:
diff changeset
388 size = os.path.getsize(fileName)
44d5973c188c Uploaded
m-zytnicki
parents:
diff changeset
389 if size > 0:
44d5973c188c Uploaded
m-zytnicki
parents:
diff changeset
390 return True
44d5973c188c Uploaded
m-zytnicki
parents:
diff changeset
391 return False
44d5973c188c Uploaded
m-zytnicki
parents:
diff changeset
392
44d5973c188c Uploaded
m-zytnicki
parents:
diff changeset
393 isSizeNotNull = staticmethod(isSizeNotNull)
44d5973c188c Uploaded
m-zytnicki
parents:
diff changeset
394
44d5973c188c Uploaded
m-zytnicki
parents:
diff changeset
395 ## Split one file into N Files by lines
44d5973c188c Uploaded
m-zytnicki
parents:
diff changeset
396 #
44d5973c188c Uploaded
m-zytnicki
parents:
diff changeset
397 # @param fileName string file name
44d5973c188c Uploaded
m-zytnicki
parents:
diff changeset
398 # @param N int number of files to create
44d5973c188c Uploaded
m-zytnicki
parents:
diff changeset
399 #
44d5973c188c Uploaded
m-zytnicki
parents:
diff changeset
400 @staticmethod
44d5973c188c Uploaded
m-zytnicki
parents:
diff changeset
401 def splitFileIntoNFiles(fileName, N):
44d5973c188c Uploaded
m-zytnicki
parents:
diff changeset
402 nbLine = FileUtils.getNbLinesInSingleFile(fileName)
44d5973c188c Uploaded
m-zytnicki
parents:
diff changeset
403 nbLinesInEachFile = nbLine
44d5973c188c Uploaded
m-zytnicki
parents:
diff changeset
404 if N > nbLine:
44d5973c188c Uploaded
m-zytnicki
parents:
diff changeset
405 N = nbLine
44d5973c188c Uploaded
m-zytnicki
parents:
diff changeset
406 if N != 0:
44d5973c188c Uploaded
m-zytnicki
parents:
diff changeset
407 nbLinesInEachFile = math.ceil(float(nbLine) / N)
44d5973c188c Uploaded
m-zytnicki
parents:
diff changeset
408 else:
44d5973c188c Uploaded
m-zytnicki
parents:
diff changeset
409 N = 1
44d5973c188c Uploaded
m-zytnicki
parents:
diff changeset
410 filePrefix, fileExt = os.path.splitext(os.path.basename(fileName))
44d5973c188c Uploaded
m-zytnicki
parents:
diff changeset
411 fileHandler = open(fileName, "r")
44d5973c188c Uploaded
m-zytnicki
parents:
diff changeset
412 for i in range(1,N+1):
44d5973c188c Uploaded
m-zytnicki
parents:
diff changeset
413 with open("%s-%s%s" %(filePrefix, i, fileExt), "w") as f:
44d5973c188c Uploaded
m-zytnicki
parents:
diff changeset
414 j = 0
44d5973c188c Uploaded
m-zytnicki
parents:
diff changeset
415 while j < nbLinesInEachFile:
44d5973c188c Uploaded
m-zytnicki
parents:
diff changeset
416 j += 1
44d5973c188c Uploaded
m-zytnicki
parents:
diff changeset
417 f.write(fileHandler.readline())
44d5973c188c Uploaded
m-zytnicki
parents:
diff changeset
418 fileHandler.close()
44d5973c188c Uploaded
m-zytnicki
parents:
diff changeset
419
44d5973c188c Uploaded
m-zytnicki
parents:
diff changeset
420 ## Split one file into files of N lines
44d5973c188c Uploaded
m-zytnicki
parents:
diff changeset
421 #
44d5973c188c Uploaded
m-zytnicki
parents:
diff changeset
422 # @param fileName string input file name
44d5973c188c Uploaded
m-zytnicki
parents:
diff changeset
423 # @param N int lines number per files
44d5973c188c Uploaded
m-zytnicki
parents:
diff changeset
424 #
44d5973c188c Uploaded
m-zytnicki
parents:
diff changeset
425 @staticmethod
44d5973c188c Uploaded
m-zytnicki
parents:
diff changeset
426 def splitFileAccordingToLineNumber(fileName, N):
44d5973c188c Uploaded
m-zytnicki
parents:
diff changeset
427 filePrefix, fileExt = os.path.splitext(os.path.basename(fileName))
44d5973c188c Uploaded
m-zytnicki
parents:
diff changeset
428 with open(fileName) as inF:
44d5973c188c Uploaded
m-zytnicki
parents:
diff changeset
429 fileNb = 1
44d5973c188c Uploaded
m-zytnicki
parents:
diff changeset
430 line = inF.readline()
44d5973c188c Uploaded
m-zytnicki
parents:
diff changeset
431 if not line or N == 0:
44d5973c188c Uploaded
m-zytnicki
parents:
diff changeset
432 outFileName = "%s-%s%s" %(filePrefix, fileNb, fileExt)
44d5973c188c Uploaded
m-zytnicki
parents:
diff changeset
433 f = open(outFileName, "wb")
44d5973c188c Uploaded
m-zytnicki
parents:
diff changeset
434 shutil.copyfileobj(open(fileName, "rb"), f)
44d5973c188c Uploaded
m-zytnicki
parents:
diff changeset
435 f.close()
44d5973c188c Uploaded
m-zytnicki
parents:
diff changeset
436 else:
44d5973c188c Uploaded
m-zytnicki
parents:
diff changeset
437 while line:
44d5973c188c Uploaded
m-zytnicki
parents:
diff changeset
438 outFileName = "%s-%s%s" %(filePrefix, fileNb, fileExt)
44d5973c188c Uploaded
m-zytnicki
parents:
diff changeset
439 with open(outFileName, "w") as outF:
44d5973c188c Uploaded
m-zytnicki
parents:
diff changeset
440 lineNb = 1
44d5973c188c Uploaded
m-zytnicki
parents:
diff changeset
441 while lineNb <= N and line:
44d5973c188c Uploaded
m-zytnicki
parents:
diff changeset
442 outF.write(line)
44d5973c188c Uploaded
m-zytnicki
parents:
diff changeset
443 line = inF.readline()
44d5973c188c Uploaded
m-zytnicki
parents:
diff changeset
444 lineNb += 1
44d5973c188c Uploaded
m-zytnicki
parents:
diff changeset
445 fileNb += 1