Mercurial > repos > fabio > gdcwebapp
comparison json_data_source_mod.py @ 14:babc444d4bd0 draft
Uploaded 20170530
author | fabio |
---|---|
date | Tue, 30 May 2017 15:56:27 -0400 |
parents | 80593f75d74a |
children | 3eabece82abb |
comparison
equal
deleted
inserted
replaced
13:39c4f4528c6e | 14:babc444d4bd0 |
---|---|
132 dataset_url, output_filename, \ | 132 dataset_url, output_filename, \ |
133 extra_files_path, file_name, \ | 133 extra_files_path, file_name, \ |
134 ext, out_data_name, \ | 134 ext, out_data_name, \ |
135 hda_id, dataset_id = set_up_config_values(json_params) | 135 hda_id, dataset_id = set_up_config_values(json_params) |
136 extension = query_item.get( 'extension' ) | 136 extension = query_item.get( 'extension' ) |
137 #filename = query_item.get( 'url' ) | 137 url = query_item.get( 'url' ) |
138 filename = query_item.get( 'name' ) | 138 filename = query_item.get( 'name' ) |
139 | 139 |
140 check_ext = "" | 140 check_ext = "" |
141 if ( filename.endswith( "gz" ) ): | 141 if ( url.endswith( "gz" ) ): |
142 check_ext = "r:gz" | 142 check_ext = "r:gz" |
143 elif ( filename.endswith( "bz2" ) ): | 143 elif ( url.endswith( "bz2" ) ): |
144 check_ext = "r:bz2" | 144 check_ext = "r:bz2" |
145 elif ( filename.endswith( "tar" ) ): | 145 elif ( url.endswith( "tar" ) ): |
146 check_ext = "r:" | 146 check_ext = "r:" |
147 isArchive = bool( check_ext and check_ext.strip() ) | 147 isArchive = bool( check_ext and check_ext.strip() ) |
148 | 148 |
149 extra_data = query_item.get( 'extra_data', None ) | 149 extra_data = query_item.get( 'extra_data', None ) |
150 if primary: | 150 if primary: |
162 metadata_parameter_file.write( metadata_to_json( dataset_id, query_item, | 162 metadata_parameter_file.write( metadata_to_json( dataset_id, query_item, |
163 target_output_filename, | 163 target_output_filename, |
164 ds_type='dataset', | 164 ds_type='dataset', |
165 primary=primary) ) | 165 primary=primary) ) |
166 | 166 |
167 download_from_query( query_item, target_output_filename ) | 167 if isArchive is False: |
168 download_from_query( query_item, target_output_filename ) | |
169 else: | |
170 target_output_path = os.path.join(appdata_path, filename) | |
171 download_from_query( query_item, target_output_path ) | |
168 if extra_data: | 172 if extra_data: |
169 extra_files_path = ''.join( [ target_output_filename, 'files' ] ) | 173 extra_files_path = ''.join( [ target_output_filename, 'files' ] ) |
170 download_extra_data( extra_data, extra_files_path ) | 174 download_extra_data( extra_data, extra_files_path ) |
171 | 175 |
172 """ the following code handles archives and decompress them in a collection """ | 176 """ the following code handles archives and decompress them in a collection """ |
173 if ( isArchive ): | 177 if ( isArchive ): |
174 walk_on_archive(target_output_filename, check_ext, query_item.get( 'name' ), appdata_path) | 178 walk_on_archive(target_output_path, check_ext, filename, appdata_path) |
175 | 179 |
176 return True | 180 return True |
177 | 181 |
178 | 182 |
179 def set_up_config_values(json_params): | 183 def set_up_config_values(json_params): |
190 extra_files_path, file_name, | 194 extra_files_path, file_name, |
191 ext, out_data_name, | 195 ext, out_data_name, |
192 hda_id, dataset_id) | 196 hda_id, dataset_id) |
193 | 197 |
194 | 198 |
195 def download_from_json_data( options, args, json_params=None, json_dataset_url=None ): | 199 def download_from_json_data( options, args ): |
196 """ Parse the returned JSON data and download files. Write metadata | 200 """ Parse the returned JSON data and download files. Write metadata |
197 to flat JSON file. | 201 to flat JSON file. |
198 """ | 202 """ |
199 output_base_path = options.path | 203 output_base_path = options.path |
200 appdata_path = options.appdata | 204 appdata_path = options.appdata |
201 if not os.path.exists(appdata_path): | 205 if not os.path.exists(appdata_path): |
202 os.makedirs(appdata_path) | 206 os.makedirs(appdata_path) |
203 | 207 |
204 # read tool job configuration file and parse parameters we need | 208 # read tool job configuration file and parse parameters we need |
205 if json_params is None: | 209 json_params = json.loads( open( options.json_param_file, 'r' ).read() ) |
206 json_params = json.loads( open( options.json_param_file, 'r' ).read() ) | |
207 | 210 |
208 dataset_url, output_filename, \ | 211 dataset_url, output_filename, \ |
209 extra_files_path, file_name, \ | 212 extra_files_path, file_name, \ |
210 ext, out_data_name, \ | 213 ext, out_data_name, \ |
211 hda_id, dataset_id = set_up_config_values(json_params) | 214 hda_id, dataset_id = set_up_config_values(json_params) |
212 # line separated JSON file to contain all dataset metadata | 215 # line separated JSON file to contain all dataset metadata |
213 metadata_parameter_file = open( json_params['job_config']['TOOL_PROVIDED_JOB_METADATA_FILE'], 'wb' ) | 216 metadata_parameter_file = open( json_params['job_config']['TOOL_PROVIDED_JOB_METADATA_FILE'], 'wb' ) |
214 | 217 |
215 # get JSON response from data source | 218 # get JSON response from data source |
216 # TODO: make sure response is not enormous | 219 # TODO: make sure response is not enormous |
217 if json_dataset_url is None: | 220 query_params = json.loads(urllib.urlopen( dataset_url ).read()) |
218 query_params = json.loads(urllib.urlopen( dataset_url ).read()) | |
219 else: | |
220 query_params = json.loads(urllib.urlopen( json_dataset_url ).read()) | |
221 # download and write files | 221 # download and write files |
222 primary = False | 222 #primary = False |
223 primary = True | |
223 # query_item, hda_id, output_base_path, dataset_id | 224 # query_item, hda_id, output_base_path, dataset_id |
224 for query_item in query_params: | 225 for query_item in query_params: |
225 if isinstance( query_item, list ): | 226 if isinstance( query_item, list ): |
226 # TODO: do something with the nested list as a collection | 227 # TODO: do something with the nested list as a collection |
227 for query_subitem in query_item: | 228 for query_subitem in query_item: |