comparison json_data_source_mod.py @ 14:babc444d4bd0 draft

Uploaded 20170530
author fabio
date Tue, 30 May 2017 15:56:27 -0400
parents 80593f75d74a
children 3eabece82abb
comparison
equal deleted inserted replaced
13:39c4f4528c6e 14:babc444d4bd0
132 dataset_url, output_filename, \ 132 dataset_url, output_filename, \
133 extra_files_path, file_name, \ 133 extra_files_path, file_name, \
134 ext, out_data_name, \ 134 ext, out_data_name, \
135 hda_id, dataset_id = set_up_config_values(json_params) 135 hda_id, dataset_id = set_up_config_values(json_params)
136 extension = query_item.get( 'extension' ) 136 extension = query_item.get( 'extension' )
137 #filename = query_item.get( 'url' ) 137 url = query_item.get( 'url' )
138 filename = query_item.get( 'name' ) 138 filename = query_item.get( 'name' )
139 139
140 check_ext = "" 140 check_ext = ""
141 if ( filename.endswith( "gz" ) ): 141 if ( url.endswith( "gz" ) ):
142 check_ext = "r:gz" 142 check_ext = "r:gz"
143 elif ( filename.endswith( "bz2" ) ): 143 elif ( url.endswith( "bz2" ) ):
144 check_ext = "r:bz2" 144 check_ext = "r:bz2"
145 elif ( filename.endswith( "tar" ) ): 145 elif ( url.endswith( "tar" ) ):
146 check_ext = "r:" 146 check_ext = "r:"
147 isArchive = bool( check_ext and check_ext.strip() ) 147 isArchive = bool( check_ext and check_ext.strip() )
148 148
149 extra_data = query_item.get( 'extra_data', None ) 149 extra_data = query_item.get( 'extra_data', None )
150 if primary: 150 if primary:
162 metadata_parameter_file.write( metadata_to_json( dataset_id, query_item, 162 metadata_parameter_file.write( metadata_to_json( dataset_id, query_item,
163 target_output_filename, 163 target_output_filename,
164 ds_type='dataset', 164 ds_type='dataset',
165 primary=primary) ) 165 primary=primary) )
166 166
167 download_from_query( query_item, target_output_filename ) 167 if isArchive is False:
168 download_from_query( query_item, target_output_filename )
169 else:
170 target_output_path = os.path.join(appdata_path, filename)
171 download_from_query( query_item, target_output_path )
168 if extra_data: 172 if extra_data:
169 extra_files_path = ''.join( [ target_output_filename, 'files' ] ) 173 extra_files_path = ''.join( [ target_output_filename, 'files' ] )
170 download_extra_data( extra_data, extra_files_path ) 174 download_extra_data( extra_data, extra_files_path )
171 175
172 """ the following code handles archives and decompress them in a collection """ 176 """ the following code handles archives and decompress them in a collection """
173 if ( isArchive ): 177 if ( isArchive ):
174 walk_on_archive(target_output_filename, check_ext, query_item.get( 'name' ), appdata_path) 178 walk_on_archive(target_output_path, check_ext, filename, appdata_path)
175 179
176 return True 180 return True
177 181
178 182
179 def set_up_config_values(json_params): 183 def set_up_config_values(json_params):
190 extra_files_path, file_name, 194 extra_files_path, file_name,
191 ext, out_data_name, 195 ext, out_data_name,
192 hda_id, dataset_id) 196 hda_id, dataset_id)
193 197
194 198
195 def download_from_json_data( options, args, json_params=None, json_dataset_url=None ): 199 def download_from_json_data( options, args ):
196 """ Parse the returned JSON data and download files. Write metadata 200 """ Parse the returned JSON data and download files. Write metadata
197 to flat JSON file. 201 to flat JSON file.
198 """ 202 """
199 output_base_path = options.path 203 output_base_path = options.path
200 appdata_path = options.appdata 204 appdata_path = options.appdata
201 if not os.path.exists(appdata_path): 205 if not os.path.exists(appdata_path):
202 os.makedirs(appdata_path) 206 os.makedirs(appdata_path)
203 207
204 # read tool job configuration file and parse parameters we need 208 # read tool job configuration file and parse parameters we need
205 if json_params is None: 209 json_params = json.loads( open( options.json_param_file, 'r' ).read() )
206 json_params = json.loads( open( options.json_param_file, 'r' ).read() )
207 210
208 dataset_url, output_filename, \ 211 dataset_url, output_filename, \
209 extra_files_path, file_name, \ 212 extra_files_path, file_name, \
210 ext, out_data_name, \ 213 ext, out_data_name, \
211 hda_id, dataset_id = set_up_config_values(json_params) 214 hda_id, dataset_id = set_up_config_values(json_params)
212 # line separated JSON file to contain all dataset metadata 215 # line separated JSON file to contain all dataset metadata
213 metadata_parameter_file = open( json_params['job_config']['TOOL_PROVIDED_JOB_METADATA_FILE'], 'wb' ) 216 metadata_parameter_file = open( json_params['job_config']['TOOL_PROVIDED_JOB_METADATA_FILE'], 'wb' )
214 217
215 # get JSON response from data source 218 # get JSON response from data source
216 # TODO: make sure response is not enormous 219 # TODO: make sure response is not enormous
217 if json_dataset_url is None: 220 query_params = json.loads(urllib.urlopen( dataset_url ).read())
218 query_params = json.loads(urllib.urlopen( dataset_url ).read())
219 else:
220 query_params = json.loads(urllib.urlopen( json_dataset_url ).read())
221 # download and write files 221 # download and write files
222 primary = False 222 #primary = False
223 primary = True
223 # query_item, hda_id, output_base_path, dataset_id 224 # query_item, hda_id, output_base_path, dataset_id
224 for query_item in query_params: 225 for query_item in query_params:
225 if isinstance( query_item, list ): 226 if isinstance( query_item, list ):
226 # TODO: do something with the nested list as a collection 227 # TODO: do something with the nested list as a collection
227 for query_subitem in query_item: 228 for query_subitem in query_item: