comparison data_manager/data_manager_fetch_tx2gene.py @ 5:c380b7da5b65 draft default tip

Uploaded
author ieguinoa
date Mon, 07 Jun 2021 16:33:53 +0000
parents d71f65b854de
children
comparison
equal deleted inserted replaced
4:bacd91d8b05a 5:c380b7da5b65
25 from json import loads, dumps 25 from json import loads, dumps
26 26
27 27
28 CHUNK_SIZE = 2**20 # 1mb 28 CHUNK_SIZE = 2**20 # 1mb
29 29
30 DATA_TABLE_NAME = 'tx2gene' 30 DATA_TABLE_NAME = 'tx2gene_table'
31 31
32 def cleanup_before_exit( tmp_dir ): 32 def cleanup_before_exit( tmp_dir ):
33 if tmp_dir and os.path.exists( tmp_dir ): 33 if tmp_dir and os.path.exists( tmp_dir ):
34 shutil.rmtree( tmp_dir ) 34 shutil.rmtree( tmp_dir )
35 35
43 # dbkey = params['param_dict']['dbkey_source']['dbkey'] 43 # dbkey = params['param_dict']['dbkey_source']['dbkey']
44 #TODO: ensure sequence_id is unique and does not already appear in location file 44 #TODO: ensure sequence_id is unique and does not already appear in location file
45 sequence_id = params['param_dict']['sequence_id'] 45 sequence_id = params['param_dict']['sequence_id']
46 if not sequence_id: 46 if not sequence_id:
47 sequence_id = dbkey #uuid.uuid4() generate and use an uuid instead? 47 sequence_id = dbkey #uuid.uuid4() generate and use an uuid instead?
48 48
49 # if params['param_dict']['dbkey_source']['dbkey_source_selector'] == 'new': 49 # if params['param_dict']['dbkey_source']['dbkey_source_selector'] == 'new':
50 # dbkey_name = params['param_dict']['dbkey_source']['dbkey_name'] 50 # dbkey_name = params['param_dict']['dbkey_source']['dbkey_name']
51 # if not dbkey_name: 51 # if not dbkey_name:
52 # dbkey_name = dbkey 52 # dbkey_name = dbkey
53 # else: 53 # else:
54 # dbkey_name = None 54 # dbkey_name = None
55 dbkey = params['param_dict']['dbkey'] 55 dbkey = params['param_dict']['dbkey']
56 dbkey_name = dbkey_description 56 dbkey_name = dbkey_description
57 sequence_name = params['param_dict']['sequence_name'] 57 sequence_name = params['param_dict']['sequence_name']
58 if not sequence_name: 58 if not sequence_name:
59 sequence_name = dbkey_description 59 sequence_name = dbkey_description
60 if not sequence_name: 60 if not sequence_name:
288 parser = optparse.OptionParser() 288 parser = optparse.OptionParser()
289 parser.add_option( '-d', '--dbkey_description', dest='dbkey_description', action='store', type="string", default=None, help='dbkey_description' ) 289 parser.add_option( '-d', '--dbkey_description', dest='dbkey_description', action='store', type="string", default=None, help='dbkey_description' )
290 parser.add_option( '-b', '--base_dir', dest='base_dir', action='store', type='string', default=None, help='base_dir') 290 parser.add_option( '-b', '--base_dir', dest='base_dir', action='store', type='string', default=None, help='base_dir')
291 parser.add_option( '-t', '--type', dest='file_type', action='store', type='string', default=None, help='file_type') 291 parser.add_option( '-t', '--type', dest='file_type', action='store', type='string', default=None, help='file_type')
292 (options, args) = parser.parse_args() 292 (options, args) = parser.parse_args()
293
294 filename = args[0] 293 filename = args[0]
295 #global DATA_TABLE_NAME 294 #global DATA_TABLE_NAME
296 rscript_gff_to_tx2gene=os.path.join( options.base_dir, 'get_tx2gene_table.R') 295 rscript_gff_to_tx2gene=os.path.join( options.base_dir, 'get_tx2gene_table.R')
297 296
298 #input_type='gff_gtf' 297 #input_type='gff_gtf'
299 #if options.file_type != 'gff_gtf': 298 #if options.file_type != 'gff_gtf':
300 # file_type='tx2gene' 299 # file_type='tx2gene'
301 300
302 params = loads( open( filename ).read() ) 301 params = loads( open( filename ).read() )
303 target_directory = params[ 'output_data' ][0]['extra_files_path'] 302 target_directory = params[ 'output_data' ][0]['extra_files_path']
304 os.mkdir( target_directory ) 303 os.mkdir( target_directory )
305 data_manager_dict = {} 304 data_manager_dict = {}
306 305
307 dbkey, dbkey_name, sequence_id, sequence_name = get_dbkey_dbname_id_name( params, dbkey_description=options.dbkey_description ) 306 dbkey, dbkey_name, sequence_id, sequence_name = get_dbkey_dbname_id_name( params, dbkey_description=options.dbkey_description )
308 307
309 if dbkey in [ None, '', '?' ]: 308 if dbkey in [ None, '', '?' ]:
310 raise Exception( '"%s" is not a valid dbkey. You must specify a valid dbkey.' % ( dbkey ) ) 309 raise Exception( '"%s" is not a valid dbkey. You must specify a valid dbkey.' % ( dbkey ) )
311 310
312 # Create a tmp_dir, in case a zip file needs to be uncompressed 311 # Create a tmp_dir, in case a zip file needs to be uncompressed
313 tmp_dir = tempfile.mkdtemp() 312 tmp_dir = tempfile.mkdtemp()
316 REFERENCE_SOURCE_TO_DOWNLOAD[ params['param_dict']['reference_source']['reference_source_selector'] ]( rscript_gff_to_tx2gene, data_manager_dict, params, target_directory, dbkey, dbkey_name, sequence_id, sequence_name, tmp_dir) 315 REFERENCE_SOURCE_TO_DOWNLOAD[ params['param_dict']['reference_source']['reference_source_selector'] ]( rscript_gff_to_tx2gene, data_manager_dict, params, target_directory, dbkey, dbkey_name, sequence_id, sequence_name, tmp_dir)
317 finally: 316 finally:
318 cleanup_before_exit(tmp_dir) 317 cleanup_before_exit(tmp_dir)
319 #save info to json file 318 #save info to json file
320 open( filename, 'wb' ).write( dumps( data_manager_dict ).encode() ) 319 open( filename, 'wb' ).write( dumps( data_manager_dict ).encode() )
321 320
322 if __name__ == "__main__": 321 if __name__ == "__main__":
323 main() 322 main()