annotate directory_copier.py @ 0:ccabef3f7d5f draft

Uploaded first version
author brenninc
date Sun, 08 May 2016 11:01:03 -0400
parents
children
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
0
ccabef3f7d5f Uploaded first version
brenninc
parents:
diff changeset
1 import gzip
ccabef3f7d5f Uploaded first version
brenninc
parents:
diff changeset
2 import optparse # using optparse as hydra still python 2.6
ccabef3f7d5f Uploaded first version
brenninc
parents:
diff changeset
3 import os.path
ccabef3f7d5f Uploaded first version
brenninc
parents:
diff changeset
4 import shutil
ccabef3f7d5f Uploaded first version
brenninc
parents:
diff changeset
5 import sys
ccabef3f7d5f Uploaded first version
brenninc
parents:
diff changeset
6
ccabef3f7d5f Uploaded first version
brenninc
parents:
diff changeset
7 def report_error(*args):
ccabef3f7d5f Uploaded first version
brenninc
parents:
diff changeset
8 sys.stderr.write(' '.join(map(str,args)) + '\n')
ccabef3f7d5f Uploaded first version
brenninc
parents:
diff changeset
9 sys.stderr.flush()
ccabef3f7d5f Uploaded first version
brenninc
parents:
diff changeset
10 sys.exit(1)
ccabef3f7d5f Uploaded first version
brenninc
parents:
diff changeset
11
ccabef3f7d5f Uploaded first version
brenninc
parents:
diff changeset
12
ccabef3f7d5f Uploaded first version
brenninc
parents:
diff changeset
13 def get_tool_data(name):
ccabef3f7d5f Uploaded first version
brenninc
parents:
diff changeset
14 root_dir = os.path.dirname((os.path.realpath(__file__)))
ccabef3f7d5f Uploaded first version
brenninc
parents:
diff changeset
15 path = os.path.join(root_dir,"tool-data",name)
ccabef3f7d5f Uploaded first version
brenninc
parents:
diff changeset
16 if not(os.path.isfile(path)):
ccabef3f7d5f Uploaded first version
brenninc
parents:
diff changeset
17 report_error(name,"file not found in tool's tool-data folder. Please ask you galaxy admin to add it back")
ccabef3f7d5f Uploaded first version
brenninc
parents:
diff changeset
18 return path
ccabef3f7d5f Uploaded first version
brenninc
parents:
diff changeset
19
ccabef3f7d5f Uploaded first version
brenninc
parents:
diff changeset
20
ccabef3f7d5f Uploaded first version
brenninc
parents:
diff changeset
21 def check_white_list(path_to_check):
ccabef3f7d5f Uploaded first version
brenninc
parents:
diff changeset
22 white_list = get_tool_data("white-list.ini")
ccabef3f7d5f Uploaded first version
brenninc
parents:
diff changeset
23 with open(white_list, 'r') as white_list_file:
ccabef3f7d5f Uploaded first version
brenninc
parents:
diff changeset
24 for line in white_list_file:
ccabef3f7d5f Uploaded first version
brenninc
parents:
diff changeset
25 line = line.strip()
ccabef3f7d5f Uploaded first version
brenninc
parents:
diff changeset
26 if len(line) >= 1 and path_to_check.startswith(line):
ccabef3f7d5f Uploaded first version
brenninc
parents:
diff changeset
27 return True
ccabef3f7d5f Uploaded first version
brenninc
parents:
diff changeset
28 report_error(path_to_check,"has not been included in the white list. Please contact the local galaxy admin to add it.")
ccabef3f7d5f Uploaded first version
brenninc
parents:
diff changeset
29
ccabef3f7d5f Uploaded first version
brenninc
parents:
diff changeset
30
ccabef3f7d5f Uploaded first version
brenninc
parents:
diff changeset
31 def check_black_list(path_to_check):
ccabef3f7d5f Uploaded first version
brenninc
parents:
diff changeset
32 black_list = get_tool_data("black-list.ini")
ccabef3f7d5f Uploaded first version
brenninc
parents:
diff changeset
33 with open(black_list, 'r') as black_list_file:
ccabef3f7d5f Uploaded first version
brenninc
parents:
diff changeset
34 for line in black_list_file:
ccabef3f7d5f Uploaded first version
brenninc
parents:
diff changeset
35 line = line.strip()
ccabef3f7d5f Uploaded first version
brenninc
parents:
diff changeset
36 if len(line) >= 1 and line in path_to_check:
ccabef3f7d5f Uploaded first version
brenninc
parents:
diff changeset
37 report_error(line,"has been black list so",path_to_check,"is not allowed. Please contact the local galaxy admin to change that, or add a symlink.")
ccabef3f7d5f Uploaded first version
brenninc
parents:
diff changeset
38 return True
ccabef3f7d5f Uploaded first version
brenninc
parents:
diff changeset
39
ccabef3f7d5f Uploaded first version
brenninc
parents:
diff changeset
40
ccabef3f7d5f Uploaded first version
brenninc
parents:
diff changeset
41 def check_pattern_get_new_name(a_file, ending, options):
ccabef3f7d5f Uploaded first version
brenninc
parents:
diff changeset
42 if options.start:
ccabef3f7d5f Uploaded first version
brenninc
parents:
diff changeset
43 if not(a_file.startswith(options.start)):
ccabef3f7d5f Uploaded first version
brenninc
parents:
diff changeset
44 return None
ccabef3f7d5f Uploaded first version
brenninc
parents:
diff changeset
45 if options.last:
ccabef3f7d5f Uploaded first version
brenninc
parents:
diff changeset
46 if ending[0] == ".":
ccabef3f7d5f Uploaded first version
brenninc
parents:
diff changeset
47 last = options.last + ending
ccabef3f7d5f Uploaded first version
brenninc
parents:
diff changeset
48 else:
ccabef3f7d5f Uploaded first version
brenninc
parents:
diff changeset
49 if options.last[-1] == ".":
ccabef3f7d5f Uploaded first version
brenninc
parents:
diff changeset
50 last = options.last + ending
ccabef3f7d5f Uploaded first version
brenninc
parents:
diff changeset
51 else:
ccabef3f7d5f Uploaded first version
brenninc
parents:
diff changeset
52 last = options.last + "." + ending
ccabef3f7d5f Uploaded first version
brenninc
parents:
diff changeset
53 if not(a_file.endswith(last)):
ccabef3f7d5f Uploaded first version
brenninc
parents:
diff changeset
54 return None
ccabef3f7d5f Uploaded first version
brenninc
parents:
diff changeset
55 if options.new_ending:
ccabef3f7d5f Uploaded first version
brenninc
parents:
diff changeset
56 name = a_file[:-len(ending)]
ccabef3f7d5f Uploaded first version
brenninc
parents:
diff changeset
57 if options.new_ending[0] ==".":
ccabef3f7d5f Uploaded first version
brenninc
parents:
diff changeset
58 if name[-1] == ".":
ccabef3f7d5f Uploaded first version
brenninc
parents:
diff changeset
59 name = name[:-1]
ccabef3f7d5f Uploaded first version
brenninc
parents:
diff changeset
60 return name + options.new_ending
ccabef3f7d5f Uploaded first version
brenninc
parents:
diff changeset
61 if options.decompress:
ccabef3f7d5f Uploaded first version
brenninc
parents:
diff changeset
62 if a_file.endswith(".gz"):
ccabef3f7d5f Uploaded first version
brenninc
parents:
diff changeset
63 return a_file[:-3]
ccabef3f7d5f Uploaded first version
brenninc
parents:
diff changeset
64 return a_file
ccabef3f7d5f Uploaded first version
brenninc
parents:
diff changeset
65
ccabef3f7d5f Uploaded first version
brenninc
parents:
diff changeset
66
ccabef3f7d5f Uploaded first version
brenninc
parents:
diff changeset
67 def check_and_get_new_name(a_file, options):
ccabef3f7d5f Uploaded first version
brenninc
parents:
diff changeset
68 for ending in options.endings:
ccabef3f7d5f Uploaded first version
brenninc
parents:
diff changeset
69 if a_file.endswith(ending):
ccabef3f7d5f Uploaded first version
brenninc
parents:
diff changeset
70 return check_pattern_get_new_name (a_file, ending, options)
ccabef3f7d5f Uploaded first version
brenninc
parents:
diff changeset
71 return None
ccabef3f7d5f Uploaded first version
brenninc
parents:
diff changeset
72
ccabef3f7d5f Uploaded first version
brenninc
parents:
diff changeset
73
ccabef3f7d5f Uploaded first version
brenninc
parents:
diff changeset
74 def link(a_file, new_name, path):
ccabef3f7d5f Uploaded first version
brenninc
parents:
diff changeset
75 file_path = os.path.join(os.path.realpath(path), a_file)
ccabef3f7d5f Uploaded first version
brenninc
parents:
diff changeset
76 sym_path = os.path.join(os.path.realpath("output"), new_name)
ccabef3f7d5f Uploaded first version
brenninc
parents:
diff changeset
77 #if not(os.path.exists(sym_path)):
ccabef3f7d5f Uploaded first version
brenninc
parents:
diff changeset
78 os.link(file_path, sym_path)
ccabef3f7d5f Uploaded first version
brenninc
parents:
diff changeset
79
ccabef3f7d5f Uploaded first version
brenninc
parents:
diff changeset
80
ccabef3f7d5f Uploaded first version
brenninc
parents:
diff changeset
81 def decompress(a_file, new_name, path):
ccabef3f7d5f Uploaded first version
brenninc
parents:
diff changeset
82 file_path = os.path.join(os.path.realpath(path), a_file)
ccabef3f7d5f Uploaded first version
brenninc
parents:
diff changeset
83 target_path = os.path.join(os.path.realpath("output"), new_name)
ccabef3f7d5f Uploaded first version
brenninc
parents:
diff changeset
84 with gzip.open(file_path, 'rb') as f_in, open(target_path, 'wb') as f_out:
ccabef3f7d5f Uploaded first version
brenninc
parents:
diff changeset
85 shutil.copyfileobj(f_in, f_out)
ccabef3f7d5f Uploaded first version
brenninc
parents:
diff changeset
86
ccabef3f7d5f Uploaded first version
brenninc
parents:
diff changeset
87
ccabef3f7d5f Uploaded first version
brenninc
parents:
diff changeset
88 def copy_and_link(path, options):
ccabef3f7d5f Uploaded first version
brenninc
parents:
diff changeset
89 os.mkdir("output")
ccabef3f7d5f Uploaded first version
brenninc
parents:
diff changeset
90 with open(options.list, 'w') as list_file:
ccabef3f7d5f Uploaded first version
brenninc
parents:
diff changeset
91 files = os.listdir(path)
ccabef3f7d5f Uploaded first version
brenninc
parents:
diff changeset
92 files.sort()
ccabef3f7d5f Uploaded first version
brenninc
parents:
diff changeset
93 for a_file in files:
ccabef3f7d5f Uploaded first version
brenninc
parents:
diff changeset
94 new_name = check_and_get_new_name(a_file, options)
ccabef3f7d5f Uploaded first version
brenninc
parents:
diff changeset
95 if new_name:
ccabef3f7d5f Uploaded first version
brenninc
parents:
diff changeset
96 list_file.write(new_name)
ccabef3f7d5f Uploaded first version
brenninc
parents:
diff changeset
97 list_file.write("\n")
ccabef3f7d5f Uploaded first version
brenninc
parents:
diff changeset
98 if options.decompress:
ccabef3f7d5f Uploaded first version
brenninc
parents:
diff changeset
99 if a_file.endswith(".gz"):
ccabef3f7d5f Uploaded first version
brenninc
parents:
diff changeset
100 decompress(a_file, new_name,path)
ccabef3f7d5f Uploaded first version
brenninc
parents:
diff changeset
101 else:
ccabef3f7d5f Uploaded first version
brenninc
parents:
diff changeset
102 link(a_file, new_name, path)
ccabef3f7d5f Uploaded first version
brenninc
parents:
diff changeset
103 elif options.link:
ccabef3f7d5f Uploaded first version
brenninc
parents:
diff changeset
104 link(a_file, new_name, path)
ccabef3f7d5f Uploaded first version
brenninc
parents:
diff changeset
105
ccabef3f7d5f Uploaded first version
brenninc
parents:
diff changeset
106
ccabef3f7d5f Uploaded first version
brenninc
parents:
diff changeset
107 if __name__ == '__main__':
ccabef3f7d5f Uploaded first version
brenninc
parents:
diff changeset
108 parser = optparse.OptionParser()
ccabef3f7d5f Uploaded first version
brenninc
parents:
diff changeset
109 parser.add_option("--path", action="store", type="string",
ccabef3f7d5f Uploaded first version
brenninc
parents:
diff changeset
110 help="Path of directory to check. ")
ccabef3f7d5f Uploaded first version
brenninc
parents:
diff changeset
111 parser.add_option("--ending", action="append", type="string", dest="endings",
ccabef3f7d5f Uploaded first version
brenninc
parents:
diff changeset
112 help="Ending that can be listed and if requested linked or decompressed. ")
ccabef3f7d5f Uploaded first version
brenninc
parents:
diff changeset
113 parser.add_option("--start", action="store", type="string",
ccabef3f7d5f Uploaded first version
brenninc
parents:
diff changeset
114 help="String that must be at the start of the file name ")
ccabef3f7d5f Uploaded first version
brenninc
parents:
diff changeset
115 parser.add_option("--last", action="store", type="string",
ccabef3f7d5f Uploaded first version
brenninc
parents:
diff changeset
116 help="String that must be the last bit of the file name before the endings")
ccabef3f7d5f Uploaded first version
brenninc
parents:
diff changeset
117 parser.add_option("--new_ending", action="store", type="string",
ccabef3f7d5f Uploaded first version
brenninc
parents:
diff changeset
118 help="New ending to replace any previous ending in list and if required links or decompressions. Note: If not set decompression will auto remove the compressioned part of the ending")
ccabef3f7d5f Uploaded first version
brenninc
parents:
diff changeset
119 #parser.add_option("--regex", action="store", type="string",
ccabef3f7d5f Uploaded first version
brenninc
parents:
diff changeset
120 # help="Regex pattern the file name (less . ending) must match before the endings")
ccabef3f7d5f Uploaded first version
brenninc
parents:
diff changeset
121 parser.add_option("--list", action="store", type="string",
ccabef3f7d5f Uploaded first version
brenninc
parents:
diff changeset
122 help="Path to where all files should be listed. ")
ccabef3f7d5f Uploaded first version
brenninc
parents:
diff changeset
123 parser.add_option("--link", action="store_true", default=False,
ccabef3f7d5f Uploaded first version
brenninc
parents:
diff changeset
124 help="If set will cause links to be added in output directory. ")
ccabef3f7d5f Uploaded first version
brenninc
parents:
diff changeset
125 parser.add_option("--decompress", action="store_true", default=False,
ccabef3f7d5f Uploaded first version
brenninc
parents:
diff changeset
126 help="If set will cause gz files to be decompressed or if not a supported decompression ending linked.")
ccabef3f7d5f Uploaded first version
brenninc
parents:
diff changeset
127 (options, args) = parser.parse_args()
ccabef3f7d5f Uploaded first version
brenninc
parents:
diff changeset
128
ccabef3f7d5f Uploaded first version
brenninc
parents:
diff changeset
129
ccabef3f7d5f Uploaded first version
brenninc
parents:
diff changeset
130 path = options.path.strip()
ccabef3f7d5f Uploaded first version
brenninc
parents:
diff changeset
131 if path[-1] != '/':
ccabef3f7d5f Uploaded first version
brenninc
parents:
diff changeset
132 path = path + "/"
ccabef3f7d5f Uploaded first version
brenninc
parents:
diff changeset
133 check_white_list(path)
ccabef3f7d5f Uploaded first version
brenninc
parents:
diff changeset
134 print path, "white listed"
ccabef3f7d5f Uploaded first version
brenninc
parents:
diff changeset
135 check_black_list(path)
ccabef3f7d5f Uploaded first version
brenninc
parents:
diff changeset
136 print path, "not black listed"
ccabef3f7d5f Uploaded first version
brenninc
parents:
diff changeset
137 copy_and_link(path, options)
ccabef3f7d5f Uploaded first version
brenninc
parents:
diff changeset
138