| 1 | 1 #!/usr/bin/python | 
|  | 2 import os | 
|  | 3 import optparse | 
|  | 4 import sys | 
|  | 5 import time | 
|  | 6 import re | 
|  | 7 | 
|  | 8 import vdb_common | 
|  | 9 import vdb_retrieval | 
|  | 10 | 
|  | 11 class MyParser(optparse.OptionParser): | 
|  | 12 	""" | 
|  | 13 	 From http://stackoverflow.com/questions/1857346/python-optparse-how-to-include-additional-info-in-usage-output | 
|  | 14 	 Provides a better display of formatted help info in epilog() portion of optParse. | 
|  | 15 	""" | 
|  | 16 	def format_epilog(self, formatter): | 
|  | 17 		return self.epilog | 
|  | 18 | 
|  | 19 | 
|  | 20 def stop_err( msg ): | 
|  | 21     sys.stderr.write("%s\n" % msg) | 
|  | 22     sys.exit(1) | 
|  | 23 | 
|  | 24 | 
|  | 25 class ReportEngine(object): | 
|  | 26 | 
|  | 27 	def __init__(self): pass | 
|  | 28 | 
|  | 29 	def __main__(self): | 
|  | 30 | 
|  | 31 		options, args = self.get_command_line() | 
|  | 32 		retrieval_obj = vdb_retrieval.VDBRetrieval() | 
|  | 33 		retrieval_obj.set_api(options.api_info_path) | 
|  | 34 | 
|  | 35 		retrievals=[] | 
|  | 36 | 
|  | 37 		for retrieval in options.retrievals.strip().strip('|').split('|'): | 
|  | 38 			# Normally xml form supplies "spec_file_id, [version list], [workflow_list]" | 
|  | 39 			params = retrieval.strip().split(',') | 
|  | 40 | 
|  | 41 			spec_file_id = params[0] | 
|  | 42 | 
|  | 43 			if spec_file_id == 'none': | 
|  | 44 				print 'Error: Form was selected without requesting a data store to retrieve!' | 
|  | 45 				sys.exit( 1 ) | 
|  | 46 | 
|  | 47 			# STEP 1:  Determine data store type and location | 
|  | 48 			data_store_spec = retrieval_obj.user_api.libraries.show_folder(retrieval_obj.library_id, spec_file_id) | 
|  | 49 			data_store_type = retrieval_obj.test_data_store_type(data_store_spec['name']) | 
|  | 50 			base_folder_id = data_store_spec['folder_id'] | 
|  | 51 | 
|  | 52 			if not data_store_type: | 
|  | 53 				print 'Error: unrecognized data store type [' + data_store_type + ']' | 
|  | 54 				sys.exit( 1 ) | 
|  | 55 | 
|  | 56 			ds_obj = retrieval_obj.get_data_store_gateway(data_store_type, spec_file_id) | 
|  | 57 | 
|  | 58 			if len(params) > 1 and len(params[1].strip()) > 0: | 
|  | 59 				_versionList = params[1].strip() | 
|  | 60 				version_id = _versionList.split()[0] # VersionList SHOULD just have 1 id | 
|  | 61 			else: | 
|  | 62 				# User didn't select version_id via "Add new retrieval" | 
|  | 63 				if options.globalRetrievalDate: | 
|  | 64 					_retrieval_date = vdb_common.parse_date(options.globalRetrievalDate) | 
|  | 65 					version_id = ds_obj.get_version_options(global_retrieval_date=_retrieval_date, selection=True) | 
|  | 66 | 
|  | 67 				else: | 
|  | 68 					version_id = '' | 
|  | 69 | 
|  | 70 			# Reestablishes file(s) if they don't exist on disk. Do data library links to it as well. | 
|  | 71 			ds_obj.get_version(version_id) | 
|  | 72 			if ds_obj.version_path == None: | 
|  | 73 | 
|  | 74 					print "Error: unable to retrieve version [%s] from %s archive [%s].  Archive doesn't contain this version id?" % (version_id, data_store_type, ds_obj.library_version_path) | 
|  | 75 					sys.exit( 1 ) | 
|  | 76 | 
|  | 77 			# Version data file(s) are sitting in [ds_obj.version_path] ready for retrieval. | 
|  | 78 			library_dataset_ids = retrieval_obj.get_library_version_datasets(ds_obj.library_version_path, base_folder_id, ds_obj.version_label, ds_obj.version_path) | 
|  | 79 | 
|  | 80 			# The only thing that doesn't have cache lookup is "folder" data that isn't linked in. | 
|  | 81 			# In that case try lookup directly. | 
|  | 82 			if len(library_dataset_ids) == 0 and data_store_type == 'folder': | 
|  | 83 				library_version_datasets = retrieval_obj.get_library_folder_datasets(ds_obj.library_version_path) | 
|  | 84 				library_dataset_ids = [item['id'] for item in library_version_datasets] | 
|  | 85 | 
|  | 86 			if len(library_dataset_ids) == 0: | 
|  | 87 | 
|  | 88 					print 'Error: unable to retrieve version [%s] from %s archive [%s] ' % (version_id, data_store_type, ds_obj.library_version_path) | 
|  | 89 					sys.exit( 1 ) | 
|  | 90 | 
|  | 91 			# At this point we have references to the galaxy ids of the requested versioned dataset, after regeneration | 
|  | 92 			versioned_datasets = retrieval_obj.update_history(library_dataset_ids, ds_obj.library_version_path, version_id) | 
|  | 93 | 
|  | 94 			if len(params) > 2: | 
|  | 95 | 
|  | 96 				workflow_list = params[2].strip() | 
|  | 97 | 
|  | 98 				if len(workflow_list) > 0: | 
|  | 99 					# We have workflow run via admin_api and admin_api history. | 
|  | 100 					retrieval_obj.get_workflow_data(workflow_list, versioned_datasets, version_id) | 
|  | 101 | 
|  | 102 | 
|  | 103 		result=retrievals | 
|  | 104 | 
|  | 105 		# Output file needs to exist.  Otherwise Galaxy doesn't generate a placeholder file name for the output, and so we can't do things like check for [placeholder name]_files folder.  Add something to report on? | 
|  | 106 		with open(options.output,'w') as fw: | 
|  | 107 			fw.writelines(result) | 
|  | 108 | 
|  | 109 | 
|  | 110 	def get_command_line(self): | 
|  | 111 		## *************************** Parse Command Line ***************************** | 
|  | 112 		parser = MyParser( | 
|  | 113 			description = 'This Galaxy tool retrieves versions of prepared data sources and places them in a galaxy "Versioned Data" library', | 
|  | 114 			usage = 'python versioned_data.py [options]', | 
|  | 115 			epilog="""Details: | 
|  | 116 | 
|  | 117 			This tool retrieves links to current or past versions of fasta (or other key-value text) databases from a cache kept in the data library called "Fasta Databases". It then places them into the current history so that subsequent tools can work with that data. | 
|  | 118 		""") | 
|  | 119 | 
|  | 120 		parser.add_option('-r', '--retrievals', type='string', dest='retrievals', | 
|  | 121 			help='List of datasources and their versions and galaxy workflows to return') | 
|  | 122 | 
|  | 123 		parser.add_option('-o', '--output', type='string', dest='output', | 
|  | 124 			help='Path of output log file to create') | 
|  | 125 | 
|  | 126 		parser.add_option('-O', '--output_id', type='string', dest='output_id', | 
|  | 127 			help='Output identifier') | 
|  | 128 | 
|  | 129 		parser.add_option('-d', '--date', type='string', dest='globalRetrievalDate', | 
|  | 130 			help='Provide date/time for data recall.  Defaults to now.') | 
|  | 131 | 
|  | 132 		parser.add_option('-v', '--version', dest='version', default=False, action='store_true', | 
|  | 133 			help='Version number of this program.') | 
|  | 134 | 
|  | 135 		parser.add_option('-s', '--api_info_path', type='string', dest='api_info_path', help='Galaxy user api key/path.') | 
|  | 136 | 
|  | 137 		return parser.parse_args() | 
|  | 138 | 
|  | 139 | 
|  | 140 | 
|  | 141 if __name__ == '__main__': | 
|  | 142 | 
|  | 143 	time_start = time.time() | 
|  | 144 | 
|  | 145 	reportEngine = ReportEngine() | 
|  | 146 	reportEngine.__main__() | 
|  | 147 | 
|  | 148 	print('Execution time (seconds): ' + str(int(time.time()-time_start))) | 
|  | 149 |