0
|
1 #!/usr/bin/env python
|
|
2
|
|
3 """
|
|
4 Fix errors in a dataset.
|
|
5 For now, only removing erroneous lines is supported.
|
|
6
|
|
7 usage: %prog input errorsfile output
|
|
8 -x, --ext: dataset extension (type)
|
|
9 -m, --methods=N: comma separated list of repair methods
|
|
10 """
|
|
11
|
|
12 import pkg_resources; pkg_resources.require( "bx-python" )
|
|
13 from bx.cookbook import doc_optparse
|
|
14
|
|
15 from galaxy import util
|
|
16
|
|
17 def main():
|
|
18 options, args = doc_optparse.parse( __doc__ )
|
|
19 methods = []
|
|
20 try:
|
|
21 if options.methods: methods = options.methods.split(",")
|
|
22 except:
|
|
23 pass
|
|
24
|
|
25 ext = options.ext
|
|
26
|
|
27 in_file = open(args[0], "r")
|
|
28 error_file = open(args[1], "r")
|
|
29 out_file = open(args[2], "w")
|
|
30
|
|
31 # string_to_object errors
|
|
32 error_list = util.string_to_object(error_file.read())
|
|
33 # index by error type and then by line number
|
|
34 error_lines = {}
|
|
35 error_types = {}
|
|
36 for error in error_list:
|
|
37 if error.linenum:
|
|
38 if error.linenum in error_lines:
|
|
39 error_lines[error.linenum].append(error)
|
|
40 else:
|
|
41 error_lines[error.linenum] = [error]
|
|
42 error_type = error.__class__.__name__
|
|
43 if error_type in error_types:
|
|
44 error_types[error_type].append(error)
|
|
45 else:
|
|
46 error_types[error_type] = [error]
|
|
47
|
|
48 linenum = 0
|
|
49 for line in in_file:
|
|
50 linenum += 1
|
|
51 # write unless
|
|
52 if "lines" in methods:
|
|
53 if linenum in error_lines:
|
|
54 line = None
|
|
55 # other processing here?
|
|
56 if line:
|
|
57 out_file.write(line)
|
|
58
|
|
59 if __name__ == "__main__":
|
|
60 main()
|