comparison data_manager/path_name_value_key_manager.py @ 1:8495c49cd056 draft default tip

planemo upload for repository https://github.com/LUMC/lumc-galaxy-tools/tree/master/data_manager_select_index_by_path commit 9061997af3bc94f49653ffd42f10b973578e371d
author rhpvorderman
date Mon, 16 Jul 2018 10:58:36 -0400
parents 5f8d9309058b
children
comparison
equal deleted inserted replaced
0:5f8d9309058b 1:8495c49cd056
1 #!/usr/bin/env python 1 #!/usr/bin/env python3
2 2 """Script to create data manager jsons"""
3
4 import argparse
3 import json 5 import json
4 import argparse 6 from pathlib import Path
5 import os 7
6 import yaml 8 import yaml
7 9 from schema import Schema, Optional
8 def _add_data_table_entry( data_manager_dict, data_table_name, data_table_entry ): 10
9 data_manager_dict['data_tables'] = data_manager_dict.get( 'data_tables', {} ) 11
10 data_manager_dict['data_tables'][ data_table_name ] = data_manager_dict['data_tables'].get( data_table_name, [] ) 12 def indexes_schema():
11 data_manager_dict['data_tables'][ data_table_name ].append( data_table_entry ) 13 return Schema(
12 return data_manager_dict 14 {'name': str,
13 15 Optional('prefix'): bool,
14 16 Optional('extensions'): [str],
15 def check_param(name, value, default=None, check_tab=True): 17 Optional('prefix_strip_extension'): bool,
16 if value in [ None, '', '?' ]: 18 Optional('extra_columns'): [str],
17 if default: 19 Optional('folder'): [str]})
18 print "Using {0} for {1} as no value provided".format( default, name ) 20
19 value = default 21
20 else: 22 def argument_parser():
21 raise Exception( '{0} is not a valid {1}. You must specify a valid {1}.'.format( value, name ) ) 23 parser = argparse.ArgumentParser()
22 if check_tab and "\t" in value: 24 parser.add_argument('--value', type=str, help='value')
23 raise Exception( '{0} is not a valid {1}. It may not contain a tab because these are used as seperators by galaxy .'.format( value, name ) ) 25 parser.add_argument('--dbkey', type=str, help='dbkey')
24 return value 26 parser.add_argument('--name', type=str, help='name')
25 27 parser.add_argument('--path', type=Path, help='path',
26 def prefix_exists(directory, prefix): 28 required=True)
27 '''checks if files exist with prefix in a directory. Returns Boolean''' 29 parser.add_argument('--data_table_name', action='store', type=str,
28 matched_files = [] 30 help='Name of the data table',
29 directory_files = os.listdir(directory) 31 required=True)
30 for directory_file in directory_files: 32 parser.add_argument('--json_output_file', action='store', type=Path,
31 if directory_file.startswith(prefix): 33 help='Json output file',
32 matched_files.append(directory_file) 34 required=True)
35 parser.add_argument("--extra-columns", type=str,
36 help='Yaml formatted string with extra columns '
37 'and their values. For example '
38 '\'{"with-gtf":"0"}\' for STAR indexes')
39 return parser
40
41
42 def check_tab(name: str, value: str):
43 if '\t' in value:
44 raise ValueError(
45 "'{0}' is not a valid '{1}'. It may not contain a tab because "
46 "these are used as seperators by galaxy .".format(
47 value, name))
48
49
50 def prefix_plus_extension_exists(directory: Path, prefix: str, extension: str):
51 """checks if files exist with prefix in a directory. Returns Boolean"""
52 matched_files = [directory_file for directory_file in directory.iterdir()
53 if
54 directory_file.name.startswith(
55 prefix) and directory_file.suffix == extension]
33 # Empty list should return False 56 # Empty list should return False
34 return bool(matched_files) 57 return bool(matched_files)
35 58
36 def prefix_plus_extension_exists(directory, prefix, extension): 59
37 '''checks if files exist with prefix in a directory. Returns Boolean''' 60 class DataTable(object):
38 matched_files = [] 61
39 directory_files = os.listdir(directory) 62 def __init__(self,
40 for directory_file in directory_files: 63 index_path: Path,
41 if directory_file.startswith(prefix) and directory_file.endswith(extension): 64 data_table_name: str,
42 matched_files.append(directory_file) 65 indexes_properties_file: Path,
43 # Empty list should return False 66 name: str = None,
44 return bool(matched_files) 67 dbkey: str = None,
68 value: str = None,
69 extra_columns: dict = None
70 ):
71 self.index_path = index_path
72 self.data_table_name = data_table_name
73 self.name = name if name else str(self.index_path.with_suffix(
74 '').name)
75 self.value = value if value else self.name
76 self.dbkey = dbkey if dbkey else self.value
77 self.extra_columns = extra_columns if extra_columns is not None else {}
78 self.indexes_properties_file = indexes_properties_file
79
80 self.check_params()
81
82 self.index_properties = self.get_index_properties()
83
84 self.check_index_file_presence()
85
86 def check_params(self):
87
88 check_tab('name', self.name)
89 check_tab('index_path', str(self.index_path.absolute().name))
90 check_tab('value', self.value)
91 check_tab('dbkey', self.dbkey)
92 self.check_extra_columns()
93
94 def check_extra_columns(self):
95 index_properties = self.get_index_properties()
96 index_extra_columns = set(index_properties.get("extra_columns", []))
97 given_extra_columns = self.extra_columns.keys()
98 if index_extra_columns != given_extra_columns:
99 if len(index_extra_columns) > 0:
100 raise ValueError(
101 "Values for the following columns should be "
102 "supplied: {0}.".format(
103 str(index_extra_columns).strip("{}")))
104 if len(index_extra_columns) == 0:
105 raise ValueError(
106 "The table '{0}' does not have extra columns".format(
107 self.data_table_name))
108 for key, value in self.extra_columns.items():
109 check_tab(key, value)
110
111 def get_index_properties(self) -> dict:
112 with self.indexes_properties_file.open('r') as properties_file:
113 indexes = yaml.safe_load(properties_file)
114 index_properties = indexes.get(self.data_table_name)
115 if index_properties is None:
116 raise ValueError(
117 "'{0}' not a supported table name".format(
118 self.data_table_name))
119 return indexes_schema().validate(index_properties)
120
121 def check_index_file_presence(self):
122 index_name = self.index_properties.get('name')
123 if index_name is None:
124 raise NotImplementedError(
125 "Property 'name' not defined for '{0}',"
126 " please contact the developers to correct the mistake.")
127 index_extensions = self.index_properties.get('extensions', [''])
128
129 # Sometimes an index path is a prefix.
130 # For example, with BWA. 'reference.fa' is the index.
131 # But the actual index files are
132 # 'reference.fa.amb', 'reference.fa.ann' etc.
133
134 # If the index is not a prefix,
135 # the index file is taken to be the path itself.
136 index_is_a_prefix = self.index_properties.get('prefix', True)
137 prefix_strip_extension = self.index_properties.get(
138 'prefix_strip_extension', False)
139 if index_is_a_prefix:
140 if prefix_strip_extension:
141 prefix = str(self.index_path.with_suffix("").name)
142 else:
143 prefix = str(self.index_path.name)
144 for extension in index_extensions:
145 if not prefix_plus_extension_exists(self.index_path.parent,
146 prefix, extension):
147 raise FileNotFoundError(
148 "Unable to find files with prefix '{0}' "
149 "and extension '{1}' in {2}. Is this a valid {3}?"
150 .format(
151 prefix,
152 extension,
153 str(self.index_path.parent),
154 index_name))
155 elif self.index_properties.get('folder') is not None:
156 for file in self.index_properties.get('folder'):
157 if not (self.index_path / Path(file)).exists():
158 raise FileNotFoundError(
159 "A file named '{0}' was not found in '{1}'".format(
160 file, str(self.index_path)))
161 elif not self.index_path.exists() and not self.index_path.is_dir():
162 raise FileNotFoundError(
163 'Unable to find path {0}.'.format(self.index_path))
164 elif self.index_path.is_dir() and self.index_properties.get(
165 'folder') is None:
166 raise IsADirectoryError(
167 '{0} is a directory not a file'.format(self.index_path))
168 elif self.index_path.exists():
169 pass
170 else:
171 raise NotImplementedError("This condition was not expected "
172 "and should not be reached. Please "
173 "contact the developers.")
174
175 @property
176 def data_manager_dict(self) -> dict:
177 data_table_entry = dict(value=self.value, dbkey=self.dbkey,
178 name=self.name,
179 path=str(self.index_path),
180 **self.extra_columns)
181 data_manager_dict = dict(data_tables=dict())
182 data_manager_dict["data_tables"][
183 self.data_table_name] = [data_table_entry]
184 return data_manager_dict
185
186 @property
187 def data_manager_json(self) -> str:
188 return json.dumps(self.data_manager_dict)
189
45 190
46 def main(): 191 def main():
47 192 options = argument_parser().parse_args()
48 #value = "test_value" 193
49 #name = "test_name" 194 if options.json_output_file.exists():
50 #print '{0} other {1} more{0}'.format(value, name ) 195 pass # Do not raise error.
51 #print '{0} is not a valid {1}. It may not contain a tab.'.format( value, name ) 196
52 197 if options.extra_columns is None:
53 #Parse Command Line 198 extra_columns = dict()
54 parser = argparse.ArgumentParser()
55 parser.add_argument( '--value', action='store', type=str, default=None, help='value' )
56 parser.add_argument( '--dbkey', action='store', type=str, default=None, help='dbkey' )
57 parser.add_argument( '--name', action='store', type=str, default=None, help='name' )
58 parser.add_argument( '--path', action='store', type=str, default=None, help='path' )
59 parser.add_argument( '--data_table_name', action='store', type=str, default=None, help='path' )
60 parser.add_argument( '--json_output_file', action='store', type=str, default=None, help='path' )
61 options = parser.parse_args()
62
63 path = check_param("path", options.path)
64 basename = os.path.basename(path)
65 filename = os.path.splitext(basename)[0]
66 name = check_param("name", options.name, default=filename)
67 value = check_param("value", options.value, default=name)
68 dbkey = check_param("dbkey", options.dbkey, default=value)
69 data_table_name = check_param("data_table_name", options.data_table_name)
70 json_output_file = check_param("json_output_file", options.json_output_file, check_tab=False)
71
72 # Check if file or prefix exists
73 indexes = yaml.load(file(os.path.join(os.path.dirname(__file__), 'indexes.yml')))
74 index_dict = indexes.get(data_table_name,{})
75 index_name = index_dict.get('name','index')
76 index_extensions = index_dict.get('extensions', [''])
77 no_prefix = index_dict.get('no_prefix', False)
78 if not no_prefix:
79 dirname = os.path.dirname(path)
80 prefix = basename
81 for extension in index_extensions:
82 if not prefix_plus_extension_exists(dirname,prefix,extension):
83 raise Exception( 'Unable to find files with prefix "{0}" and extension "{1}" in {2}. Is this a valid {3}?'.format( prefix, extension, dirname, index_name ) )
84 else: 199 else:
85 if not os.path.exists(path): 200 try:
86 raise Exception( 'Unable to find path {0}.'.format( path ) ) 201 extra_columns = yaml.safe_load(options.extra_columns)
87 202 except yaml.parser.ParserError as e:
88 if os.path.exists(json_output_file): 203 raise yaml.parser.ParserError(
89 params = json.loads( open( json_output_file ).read() ) 204 "Invalid yaml string for --extra_indexes. \nError {0}".format(
90 print "params", params 205 e))
91 else: 206
92 params = {} 207 index_properties_file = Path(__file__).parent / Path("indexes.yml")
93 208 data_table = DataTable(index_path=options.path,
94 data_manager_dict = {} 209 data_table_name=options.data_table_name,
95 data_table_entry = dict( value=value, dbkey=dbkey, name=name, path=path ) 210 name=options.name,
96 _add_data_table_entry( data_manager_dict, data_table_name, data_table_entry ) 211 value=options.value,
97 212 dbkey=options.dbkey,
98 #save info to json file 213 indexes_properties_file=index_properties_file,
99 with open( json_output_file, 'wb' ) as output_file: 214 extra_columns=extra_columns)
100 output_file.write( json.dumps( data_manager_dict ) ) 215
101 output_file.write( "\n" ) 216 # save info to json file
217 with options.json_output_file.open('w') as output_file:
218 output_file.write(data_table.data_manager_json)
219
102 220
103 if __name__ == "__main__": 221 if __name__ == "__main__":
104 main() 222 main()