Mercurial > repos > fabio > gdcwebapp
comparison json_data_source_mod.py @ 14:babc444d4bd0 draft
Uploaded 20170530
| author | fabio |
|---|---|
| date | Tue, 30 May 2017 15:56:27 -0400 |
| parents | 80593f75d74a |
| children | 3eabece82abb |
comparison
equal
deleted
inserted
replaced
| 13:39c4f4528c6e | 14:babc444d4bd0 |
|---|---|
| 132 dataset_url, output_filename, \ | 132 dataset_url, output_filename, \ |
| 133 extra_files_path, file_name, \ | 133 extra_files_path, file_name, \ |
| 134 ext, out_data_name, \ | 134 ext, out_data_name, \ |
| 135 hda_id, dataset_id = set_up_config_values(json_params) | 135 hda_id, dataset_id = set_up_config_values(json_params) |
| 136 extension = query_item.get( 'extension' ) | 136 extension = query_item.get( 'extension' ) |
| 137 #filename = query_item.get( 'url' ) | 137 url = query_item.get( 'url' ) |
| 138 filename = query_item.get( 'name' ) | 138 filename = query_item.get( 'name' ) |
| 139 | 139 |
| 140 check_ext = "" | 140 check_ext = "" |
| 141 if ( filename.endswith( "gz" ) ): | 141 if ( url.endswith( "gz" ) ): |
| 142 check_ext = "r:gz" | 142 check_ext = "r:gz" |
| 143 elif ( filename.endswith( "bz2" ) ): | 143 elif ( url.endswith( "bz2" ) ): |
| 144 check_ext = "r:bz2" | 144 check_ext = "r:bz2" |
| 145 elif ( filename.endswith( "tar" ) ): | 145 elif ( url.endswith( "tar" ) ): |
| 146 check_ext = "r:" | 146 check_ext = "r:" |
| 147 isArchive = bool( check_ext and check_ext.strip() ) | 147 isArchive = bool( check_ext and check_ext.strip() ) |
| 148 | 148 |
| 149 extra_data = query_item.get( 'extra_data', None ) | 149 extra_data = query_item.get( 'extra_data', None ) |
| 150 if primary: | 150 if primary: |
| 162 metadata_parameter_file.write( metadata_to_json( dataset_id, query_item, | 162 metadata_parameter_file.write( metadata_to_json( dataset_id, query_item, |
| 163 target_output_filename, | 163 target_output_filename, |
| 164 ds_type='dataset', | 164 ds_type='dataset', |
| 165 primary=primary) ) | 165 primary=primary) ) |
| 166 | 166 |
| 167 download_from_query( query_item, target_output_filename ) | 167 if isArchive is False: |
| 168 download_from_query( query_item, target_output_filename ) | |
| 169 else: | |
| 170 target_output_path = os.path.join(appdata_path, filename) | |
| 171 download_from_query( query_item, target_output_path ) | |
| 168 if extra_data: | 172 if extra_data: |
| 169 extra_files_path = ''.join( [ target_output_filename, 'files' ] ) | 173 extra_files_path = ''.join( [ target_output_filename, 'files' ] ) |
| 170 download_extra_data( extra_data, extra_files_path ) | 174 download_extra_data( extra_data, extra_files_path ) |
| 171 | 175 |
| 172 """ the following code handles archives and decompress them in a collection """ | 176 """ the following code handles archives and decompress them in a collection """ |
| 173 if ( isArchive ): | 177 if ( isArchive ): |
| 174 walk_on_archive(target_output_filename, check_ext, query_item.get( 'name' ), appdata_path) | 178 walk_on_archive(target_output_path, check_ext, filename, appdata_path) |
| 175 | 179 |
| 176 return True | 180 return True |
| 177 | 181 |
| 178 | 182 |
| 179 def set_up_config_values(json_params): | 183 def set_up_config_values(json_params): |
| 190 extra_files_path, file_name, | 194 extra_files_path, file_name, |
| 191 ext, out_data_name, | 195 ext, out_data_name, |
| 192 hda_id, dataset_id) | 196 hda_id, dataset_id) |
| 193 | 197 |
| 194 | 198 |
| 195 def download_from_json_data( options, args, json_params=None, json_dataset_url=None ): | 199 def download_from_json_data( options, args ): |
| 196 """ Parse the returned JSON data and download files. Write metadata | 200 """ Parse the returned JSON data and download files. Write metadata |
| 197 to flat JSON file. | 201 to flat JSON file. |
| 198 """ | 202 """ |
| 199 output_base_path = options.path | 203 output_base_path = options.path |
| 200 appdata_path = options.appdata | 204 appdata_path = options.appdata |
| 201 if not os.path.exists(appdata_path): | 205 if not os.path.exists(appdata_path): |
| 202 os.makedirs(appdata_path) | 206 os.makedirs(appdata_path) |
| 203 | 207 |
| 204 # read tool job configuration file and parse parameters we need | 208 # read tool job configuration file and parse parameters we need |
| 205 if json_params is None: | 209 json_params = json.loads( open( options.json_param_file, 'r' ).read() ) |
| 206 json_params = json.loads( open( options.json_param_file, 'r' ).read() ) | |
| 207 | 210 |
| 208 dataset_url, output_filename, \ | 211 dataset_url, output_filename, \ |
| 209 extra_files_path, file_name, \ | 212 extra_files_path, file_name, \ |
| 210 ext, out_data_name, \ | 213 ext, out_data_name, \ |
| 211 hda_id, dataset_id = set_up_config_values(json_params) | 214 hda_id, dataset_id = set_up_config_values(json_params) |
| 212 # line separated JSON file to contain all dataset metadata | 215 # line separated JSON file to contain all dataset metadata |
| 213 metadata_parameter_file = open( json_params['job_config']['TOOL_PROVIDED_JOB_METADATA_FILE'], 'wb' ) | 216 metadata_parameter_file = open( json_params['job_config']['TOOL_PROVIDED_JOB_METADATA_FILE'], 'wb' ) |
| 214 | 217 |
| 215 # get JSON response from data source | 218 # get JSON response from data source |
| 216 # TODO: make sure response is not enormous | 219 # TODO: make sure response is not enormous |
| 217 if json_dataset_url is None: | 220 query_params = json.loads(urllib.urlopen( dataset_url ).read()) |
| 218 query_params = json.loads(urllib.urlopen( dataset_url ).read()) | |
| 219 else: | |
| 220 query_params = json.loads(urllib.urlopen( json_dataset_url ).read()) | |
| 221 # download and write files | 221 # download and write files |
| 222 primary = False | 222 #primary = False |
| 223 primary = True | |
| 223 # query_item, hda_id, output_base_path, dataset_id | 224 # query_item, hda_id, output_base_path, dataset_id |
| 224 for query_item in query_params: | 225 for query_item in query_params: |
| 225 if isinstance( query_item, list ): | 226 if isinstance( query_item, list ): |
| 226 # TODO: do something with the nested list as a collection | 227 # TODO: do something with the nested list as a collection |
| 227 for query_subitem in query_item: | 228 for query_subitem in query_item: |
