gdcwebapp: json_data_source

comparison json_data_source_mod.py @ 14:babc444d4bd0 draft

Uploaded 20170530

author	fabio
date	Tue, 30 May 2017 15:56:27 -0400
parents	80593f75d74a
children	3eabece82abb

comparison

equal deleted inserted replaced

-:39c4f4528c6e
+:babc444d4bd0
 dataset_url, output_filename, \
 extra_files_path, file_name, \
 ext, out_data_name, \
 hda_id, dataset_id = set_up_config_values(json_params)
 extension = query_item.get( 'extension' )
-#filename = query_item.get( 'url' )
+url = query_item.get( 'url' )
 filename = query_item.get( 'name' )
 check_ext = ""
-if ( filename.endswith( "gz" ) ):
+if ( url.endswith( "gz" ) ):
 check_ext = "r:gz"
-elif ( filename.endswith( "bz2" ) ):
+elif ( url.endswith( "bz2" ) ):
 check_ext = "r:bz2"
-elif ( filename.endswith( "tar" ) ):
+elif ( url.endswith( "tar" ) ):
 check_ext = "r:"
 isArchive = bool( check_ext and check_ext.strip() )
 extra_data = query_item.get( 'extra_data', None )
 if primary:
 metadata_parameter_file.write( metadata_to_json( dataset_id, query_item,
 target_output_filename,
 ds_type='dataset',
 primary=primary) )
-download_from_query( query_item, target_output_filename )
+if isArchive is False:
+download_from_query( query_item, target_output_filename )
+else:
+target_output_path = os.path.join(appdata_path, filename)
+download_from_query( query_item, target_output_path )
 if extra_data:
 extra_files_path = ''.join( [ target_output_filename, 'files' ] )
 download_extra_data( extra_data, extra_files_path )
 """ the following code handles archives and decompress them in a collection """
 if ( isArchive ):
-walk_on_archive(target_output_filename, check_ext, query_item.get( 'name' ), appdata_path)
+walk_on_archive(target_output_path, check_ext, filename, appdata_path)
 return True
 def set_up_config_values(json_params):
 extra_files_path, file_name,
 ext, out_data_name,
 hda_id, dataset_id)
-def download_from_json_data( options, args, json_params=None, json_dataset_url=None ):
+def download_from_json_data( options, args ):
 """ Parse the returned JSON data and download files. Write metadata
 to flat JSON file.
 """
 output_base_path = options.path
 appdata_path = options.appdata
 if not os.path.exists(appdata_path):
 os.makedirs(appdata_path)
 # read tool job configuration file and parse parameters we need
-if json_params is None:
+json_params = json.loads( open( options.json_param_file, 'r' ).read() )
-json_params = json.loads( open( options.json_param_file, 'r' ).read() )
 dataset_url, output_filename, \
 extra_files_path, file_name, \
 ext, out_data_name, \
 hda_id, dataset_id = set_up_config_values(json_params)
 # line separated JSON file to contain all dataset metadata
 metadata_parameter_file = open( json_params['job_config']['TOOL_PROVIDED_JOB_METADATA_FILE'], 'wb' )
 # get JSON response from data source
 # TODO: make sure response is not enormous
-if json_dataset_url is None:
+query_params = json.loads(urllib.urlopen( dataset_url ).read())
-query_params = json.loads(urllib.urlopen( dataset_url ).read())
-else:
-query_params = json.loads(urllib.urlopen( json_dataset_url ).read())
 # download and write files
-primary = False
+#primary = False
+primary = True
 # query_item, hda_id, output_base_path, dataset_id
 for query_item in query_params:
 if isinstance( query_item, list ):
 # TODO: do something with the nested list as a collection
 for query_subitem in query_item:

Mercurial > repos > fabio > gdcwebapp

comparison json_data_source_mod.py @ 14:babc444d4bd0 draft