Mercurial > repos > ieguinoa > data_manager_fetch_tx2gene
comparison data_manager/data_manager_fetch_tx2gene.py @ 5:c380b7da5b65 draft default tip
Uploaded
| author | ieguinoa |
|---|---|
| date | Mon, 07 Jun 2021 16:33:53 +0000 |
| parents | d71f65b854de |
| children |
comparison
equal
deleted
inserted
replaced
| 4:bacd91d8b05a | 5:c380b7da5b65 |
|---|---|
| 25 from json import loads, dumps | 25 from json import loads, dumps |
| 26 | 26 |
| 27 | 27 |
| 28 CHUNK_SIZE = 2**20 # 1mb | 28 CHUNK_SIZE = 2**20 # 1mb |
| 29 | 29 |
| 30 DATA_TABLE_NAME = 'tx2gene' | 30 DATA_TABLE_NAME = 'tx2gene_table' |
| 31 | 31 |
| 32 def cleanup_before_exit( tmp_dir ): | 32 def cleanup_before_exit( tmp_dir ): |
| 33 if tmp_dir and os.path.exists( tmp_dir ): | 33 if tmp_dir and os.path.exists( tmp_dir ): |
| 34 shutil.rmtree( tmp_dir ) | 34 shutil.rmtree( tmp_dir ) |
| 35 | 35 |
| 43 # dbkey = params['param_dict']['dbkey_source']['dbkey'] | 43 # dbkey = params['param_dict']['dbkey_source']['dbkey'] |
| 44 #TODO: ensure sequence_id is unique and does not already appear in location file | 44 #TODO: ensure sequence_id is unique and does not already appear in location file |
| 45 sequence_id = params['param_dict']['sequence_id'] | 45 sequence_id = params['param_dict']['sequence_id'] |
| 46 if not sequence_id: | 46 if not sequence_id: |
| 47 sequence_id = dbkey #uuid.uuid4() generate and use an uuid instead? | 47 sequence_id = dbkey #uuid.uuid4() generate and use an uuid instead? |
| 48 | 48 |
| 49 # if params['param_dict']['dbkey_source']['dbkey_source_selector'] == 'new': | 49 # if params['param_dict']['dbkey_source']['dbkey_source_selector'] == 'new': |
| 50 # dbkey_name = params['param_dict']['dbkey_source']['dbkey_name'] | 50 # dbkey_name = params['param_dict']['dbkey_source']['dbkey_name'] |
| 51 # if not dbkey_name: | 51 # if not dbkey_name: |
| 52 # dbkey_name = dbkey | 52 # dbkey_name = dbkey |
| 53 # else: | 53 # else: |
| 54 # dbkey_name = None | 54 # dbkey_name = None |
| 55 dbkey = params['param_dict']['dbkey'] | 55 dbkey = params['param_dict']['dbkey'] |
| 56 dbkey_name = dbkey_description | 56 dbkey_name = dbkey_description |
| 57 sequence_name = params['param_dict']['sequence_name'] | 57 sequence_name = params['param_dict']['sequence_name'] |
| 58 if not sequence_name: | 58 if not sequence_name: |
| 59 sequence_name = dbkey_description | 59 sequence_name = dbkey_description |
| 60 if not sequence_name: | 60 if not sequence_name: |
| 288 parser = optparse.OptionParser() | 288 parser = optparse.OptionParser() |
| 289 parser.add_option( '-d', '--dbkey_description', dest='dbkey_description', action='store', type="string", default=None, help='dbkey_description' ) | 289 parser.add_option( '-d', '--dbkey_description', dest='dbkey_description', action='store', type="string", default=None, help='dbkey_description' ) |
| 290 parser.add_option( '-b', '--base_dir', dest='base_dir', action='store', type='string', default=None, help='base_dir') | 290 parser.add_option( '-b', '--base_dir', dest='base_dir', action='store', type='string', default=None, help='base_dir') |
| 291 parser.add_option( '-t', '--type', dest='file_type', action='store', type='string', default=None, help='file_type') | 291 parser.add_option( '-t', '--type', dest='file_type', action='store', type='string', default=None, help='file_type') |
| 292 (options, args) = parser.parse_args() | 292 (options, args) = parser.parse_args() |
| 293 | |
| 294 filename = args[0] | 293 filename = args[0] |
| 295 #global DATA_TABLE_NAME | 294 #global DATA_TABLE_NAME |
| 296 rscript_gff_to_tx2gene=os.path.join( options.base_dir, 'get_tx2gene_table.R') | 295 rscript_gff_to_tx2gene=os.path.join( options.base_dir, 'get_tx2gene_table.R') |
| 297 | 296 |
| 298 #input_type='gff_gtf' | 297 #input_type='gff_gtf' |
| 299 #if options.file_type != 'gff_gtf': | 298 #if options.file_type != 'gff_gtf': |
| 300 # file_type='tx2gene' | 299 # file_type='tx2gene' |
| 301 | 300 |
| 302 params = loads( open( filename ).read() ) | 301 params = loads( open( filename ).read() ) |
| 303 target_directory = params[ 'output_data' ][0]['extra_files_path'] | 302 target_directory = params[ 'output_data' ][0]['extra_files_path'] |
| 304 os.mkdir( target_directory ) | 303 os.mkdir( target_directory ) |
| 305 data_manager_dict = {} | 304 data_manager_dict = {} |
| 306 | 305 |
| 307 dbkey, dbkey_name, sequence_id, sequence_name = get_dbkey_dbname_id_name( params, dbkey_description=options.dbkey_description ) | 306 dbkey, dbkey_name, sequence_id, sequence_name = get_dbkey_dbname_id_name( params, dbkey_description=options.dbkey_description ) |
| 308 | 307 |
| 309 if dbkey in [ None, '', '?' ]: | 308 if dbkey in [ None, '', '?' ]: |
| 310 raise Exception( '"%s" is not a valid dbkey. You must specify a valid dbkey.' % ( dbkey ) ) | 309 raise Exception( '"%s" is not a valid dbkey. You must specify a valid dbkey.' % ( dbkey ) ) |
| 311 | 310 |
| 312 # Create a tmp_dir, in case a zip file needs to be uncompressed | 311 # Create a tmp_dir, in case a zip file needs to be uncompressed |
| 313 tmp_dir = tempfile.mkdtemp() | 312 tmp_dir = tempfile.mkdtemp() |
| 316 REFERENCE_SOURCE_TO_DOWNLOAD[ params['param_dict']['reference_source']['reference_source_selector'] ]( rscript_gff_to_tx2gene, data_manager_dict, params, target_directory, dbkey, dbkey_name, sequence_id, sequence_name, tmp_dir) | 315 REFERENCE_SOURCE_TO_DOWNLOAD[ params['param_dict']['reference_source']['reference_source_selector'] ]( rscript_gff_to_tx2gene, data_manager_dict, params, target_directory, dbkey, dbkey_name, sequence_id, sequence_name, tmp_dir) |
| 317 finally: | 316 finally: |
| 318 cleanup_before_exit(tmp_dir) | 317 cleanup_before_exit(tmp_dir) |
| 319 #save info to json file | 318 #save info to json file |
| 320 open( filename, 'wb' ).write( dumps( data_manager_dict ).encode() ) | 319 open( filename, 'wb' ).write( dumps( data_manager_dict ).encode() ) |
| 321 | 320 |
| 322 if __name__ == "__main__": | 321 if __name__ == "__main__": |
| 323 main() | 322 main() |
