Mercurial > repos > fabio > gdcwebapp
diff json_data_source_mod.py @ 14:babc444d4bd0 draft
Uploaded 20170530
author | fabio |
---|---|
date | Tue, 30 May 2017 15:56:27 -0400 |
parents | 80593f75d74a |
children | 3eabece82abb |
line wrap: on
line diff
--- a/json_data_source_mod.py Tue May 30 12:26:38 2017 -0400 +++ b/json_data_source_mod.py Tue May 30 15:56:27 2017 -0400 @@ -134,15 +134,15 @@ ext, out_data_name, \ hda_id, dataset_id = set_up_config_values(json_params) extension = query_item.get( 'extension' ) - #filename = query_item.get( 'url' ) + url = query_item.get( 'url' ) filename = query_item.get( 'name' ) check_ext = "" - if ( filename.endswith( "gz" ) ): + if ( url.endswith( "gz" ) ): check_ext = "r:gz" - elif ( filename.endswith( "bz2" ) ): + elif ( url.endswith( "bz2" ) ): check_ext = "r:bz2" - elif ( filename.endswith( "tar" ) ): + elif ( url.endswith( "tar" ) ): check_ext = "r:" isArchive = bool( check_ext and check_ext.strip() ) @@ -164,14 +164,18 @@ ds_type='dataset', primary=primary) ) - download_from_query( query_item, target_output_filename ) + if isArchive is False: + download_from_query( query_item, target_output_filename ) + else: + target_output_path = os.path.join(appdata_path, filename) + download_from_query( query_item, target_output_path ) if extra_data: extra_files_path = ''.join( [ target_output_filename, 'files' ] ) download_extra_data( extra_data, extra_files_path ) """ the following code handles archives and decompress them in a collection """ if ( isArchive ): - walk_on_archive(target_output_filename, check_ext, query_item.get( 'name' ), appdata_path) + walk_on_archive(target_output_path, check_ext, filename, appdata_path) return True @@ -192,7 +196,7 @@ hda_id, dataset_id) -def download_from_json_data( options, args, json_params=None, json_dataset_url=None ): +def download_from_json_data( options, args ): """ Parse the returned JSON data and download files. Write metadata to flat JSON file. """ @@ -202,8 +206,7 @@ os.makedirs(appdata_path) # read tool job configuration file and parse parameters we need - if json_params is None: - json_params = json.loads( open( options.json_param_file, 'r' ).read() ) + json_params = json.loads( open( options.json_param_file, 'r' ).read() ) dataset_url, output_filename, \ extra_files_path, file_name, \ @@ -214,12 +217,10 @@ # get JSON response from data source # TODO: make sure response is not enormous - if json_dataset_url is None: - query_params = json.loads(urllib.urlopen( dataset_url ).read()) - else: - query_params = json.loads(urllib.urlopen( json_dataset_url ).read()) + query_params = json.loads(urllib.urlopen( dataset_url ).read()) # download and write files - primary = False + #primary = False + primary = True # query_item, hda_id, output_base_path, dataset_id for query_item in query_params: if isinstance( query_item, list ):