Mercurial > repos > charles-bernard > data_manager_build_alfa_indexes

--- a/data_manager_build_alfa_indexes/data_manager/data_manager_build_alfa_indexes.py	Mon Oct 31 09:18:34 2016 -0400
+++ b/data_manager_build_alfa_indexes/data_manager/data_manager_build_alfa_indexes.py	Mon Oct 31 18:40:11 2016 -0400
@@ -12,7 +12,6 @@
 from optparse import OptionParser
 from galaxy.util.json import from_json_string, to_json_string

-
 def get_arg():
     parser = OptionParser()
     parser.add_option("-e", "--ensembl", dest = 'ensembl_info', action = "store", nargs = 2, metavar = ("kingdom", "species_name"), type = "str")
@@ -38,7 +37,6 @@
     local_file.write(src_file.read())
     local_file.close()

-
 def uncompress_gz(gz_file_name, uncompressed_file_name):
     print("____________________________________________________________")
     print("*** Uncompressing %s" % gz_file_name)
@@ -48,19 +46,16 @@
     uncompressed_file.close()
     print("-> Uncompressed !\n")

-
 def add_data_table_entry( data_manager_dict, data_table_entry ):
     data_manager_dict['data_tables'] = data_manager_dict.get( 'data_tables', {} )
     data_manager_dict['data_tables']['alfa_indexes'] = data_manager_dict['data_tables'].get( 'alfa_indexes', data_table_entry )
     return data_manager_dict

-
 def standardize_species_name(species_name):
     standard_species_name = re.sub(r'[)]$', '', species_name)
     standard_species_name = re.sub(r'[ _),-.(=]+ *', '_', standard_species_name)
     return standard_species_name.lower()

-
 def get_ensembl_url_root(kingdom):
     print("____________________________________________________________")
     print("*** Determining Ensembl ftp root url")
@@ -71,11 +66,11 @@
     print("-> Determined !\n")
     return root

-
 def test_ensembl_species_exists(kingdom, url, species_name):
     print("____________________________________________________________")
     print ("*** Testing whether %s is referenced in Ensembl %s" % (species_name, kingdom))
     list_species_file_name = 'species_Ensembl%s%s.txt' % (kingdom[0].upper(), kingdom[1:])
+    print("%s" % kingdom)
     if kingdom=='vertebrates':
         download_file(url, list_species_file_name)
     else:
@@ -93,8 +88,13 @@
     nb_lines = len(species_lines)

     if nb_lines == 1:
-        columns = species_lines[0].split('\t')
-        found_species_name = columns[1]
+        if kingdom == 'vertebrates':
+            fields = species_lines[0].split(' ')
+            columns = fields[-1].split('\r')
+            found_species_name = columns[0]
+        else:
+            columns = species_lines[0].split('\t')
+            found_species_name = columns[1]
         if species_name != found_species_name:
             print('-> \'%s\' has been replace with the complete species name \'%s\'' % (species_name, found_species_name))
             return found_species_name, species_lines_matched
@@ -103,8 +103,13 @@
     else:
         list_species = [''] * nb_lines
         for i in range(0, nb_lines):
-            columns = species_lines[i].split('\t')
-            list_species[i] = columns[1]
+            if kingdom == 'vertebrates':
+                fields = species_lines[0].split(' ')
+                columns = fields[-1].split('\r')
+                list_species[i] = columns[0]
+            else:
+                columns = species_lines[0].split('\t')
+                list_species[i] = columns[1]
             exact_match = re.search('^%s$' % species_name, list_species[i])
             if exact_match:
                 print("-> Referenced !\n")
@@ -113,7 +118,6 @@
         logging.critical(msg)
         sys.exit(msg)

-
 def get_ensembl_collection(kingdom, species_line):
     print("*** Extracting the %s_collection of the species" % kingdom)
     collection_regex = re.compile('%s_.+_collection' % kingdom.lower())
@@ -124,7 +128,6 @@
     print("-> Extracted !\n")
     return collection_match.group(0)

-
 def get_ensembl_gtf_archive_name(url_dir, species_name):
     print("____________________________________________________________")
     print("*** Extracting the gtf archive name of %s" % species_name)
@@ -137,7 +140,6 @@
     print("-> Extracted !\n")
     return gtf_archive_name

-
 def get_ensembl_gtf_archive(kingdom, url, species_name, species_line):
     if kingdom != 'vertebrates':
         url = url + 'gtf/'
@@ -153,7 +155,6 @@
     print("-> Downloaded !\n")
     return gtf_archive_name

-
 def generate_alfa_indexes(path_to_alfa, gtf_file_name):
     print("____________________________________________________________")
     print("*** Generating alfa indexes from %s" % gtf_file_name)
@@ -166,7 +167,6 @@
     print("Alfa prompt:\n%s" % alfa_out)
     print("-> Generated !\n")

-
 def get_data_table_new_entry(gtf_archive_name):
     info_list = gtf_archive_name.split('.')
     species = info_list[0]
@@ -179,14 +179,11 @@
     entry_dict = { 'species': species, 'version': version, 'release': release, 'value': value, 'dbkey': dbkey, 'name': name, 'prefix': prefix }
     return entry_dict

-
 def main():
     options, args = get_arg()
-    galaxy_root_dir = args[0]
-    tool_dir = args[1]
+    tool_dir = args[0]

     path_to_alfa = os.path.join(tool_dir, 'ALFA.py')
-    path_to_tmp_dir = os.path.join(galaxy_root_dir, 'database/tmp/')

     if options.output_filename == None:
         msg = 'No json output file specified'
@@ -197,10 +194,9 @@
     target_directory = params['output_data'][0]['extra_files_path']
     os.mkdir(target_directory)

-    tmp_dir = tempfile.mkdtemp(prefix='tmp', suffix='', dir=path_to_tmp_dir)
+    tmp_dir = tempfile.mkdtemp(prefix='tmp', suffix='')
     os.chdir(tmp_dir)
-    #log_file_name = 'galaxy_log_report.log'
-    #logging.basicConfig(level=print, filename=log_file_name, filemode="a+", format='%(message)s')
+
     data_manager_dict = {}

     if options.ensembl_info:
@@ -236,8 +232,6 @@

     shutil.copyfile(stranded_index_name, os.path.join(target_directory, stranded_index_name))
     shutil.copyfile(unstranded_index_name, os.path.join(target_directory, unstranded_index_name))
-    #shutil.copyfile(log_file_name, os.path.join(target_directory, log_file_name))
-    #shutil.copyfile(log_file_name, options.log_filename)

     cleanup_before_exit(tmp_dir)