changeset 28:9139892d06a2 draft

Uploaded
author charles-bernard
date Thu, 08 Dec 2016 03:43:26 -0500
parents 4f70c9afd89d
children 0c821f76e2e5
files data_manager_build_alfa_indexes/data_manager/data_manager_build_alfa_indexes.py
diffstat 1 files changed, 14 insertions(+), 8 deletions(-) [+]
line wrap: on
line diff
--- a/data_manager_build_alfa_indexes/data_manager/data_manager_build_alfa_indexes.py	Sat Nov 19 04:24:46 2016 -0500
+++ b/data_manager_build_alfa_indexes/data_manager/data_manager_build_alfa_indexes.py	Thu Dec 08 03:43:26 2016 -0500
@@ -28,7 +28,6 @@
     page = urllib2.urlopen(req)
     return page.read()
 
-
 def download_file(link, local_file_name):
     req = urllib2.Request(link)
     src_file = urllib2.urlopen(req)
@@ -51,6 +50,7 @@
     return data_manager_dict
 
 def standardize_species_name(species_name):
+    # substitute all capital letters, replace every succession of chars that are not letters to one underscore
     standard_species_name = re.sub(r'[)]$', '', species_name)
     standard_species_name = re.sub(r'[ _),-.(=]+ *', '_', standard_species_name)
     return standard_species_name.lower()
@@ -66,6 +66,11 @@
     return root
 
 def test_ensembl_species_exists(kingdom, url, species_name):
+    """
+    Test if a species exist on the ftp & return the species name with the species_line if so.
+    if the species_name matches a single string, then this string will be returned as the species name
+    if the species_name matches several strings, then an error is printed with all the possible species to enter for a new run
+    """
     print("____________________________________________________________")
     print ("*** Testing whether %s is referenced in Ensembl %s" % (species_name, kingdom))
     list_species_file_name = 'species_Ensembl%s%s.txt' % (kingdom[0].upper(), kingdom[1:])
@@ -111,7 +116,10 @@
             if exact_match:
                 print("-> Referenced !\n")
                 return species_name, species_lines[i]
-        msg = 'The string \'%s\' has been matched against the list of Ensembl Species but is not a complete species name.\nPlease retry with one of the following species names:\n%s' % (species_name, list_species[0:])
+        msg = ("The string \'%s\' has been matched against the list of Ensembl Species but is not a complete species name.\n"
+                "Please retry with one of these following species names:\n" % species_name)
+        for s in list_species:
+            msg = ("%s- %s\n" % (msg, s))
         sys.exit(msg)
 
 def get_ensembl_collection(kingdom, species_line):
@@ -184,6 +192,10 @@
         msg = 'No json output file specified'
         sys.exit(msg)
     output_filename = options.output_filename
+
+    # Interestingly the output file to return is not empty initially.
+    # it contains a dictionary, with notably the path to the dir where the alfa_indexes
+    # are expected to be found
     params = from_json_string(open(output_filename).read())
     target_directory = params['output_data'][0]['extra_files_path']
     os.mkdir(target_directory)
@@ -209,8 +221,6 @@
 
     print("____________________________________________________________")
     print("*** General Info")
-    print("TMP DIR:\t%s" % tmp_dir)
-    print("TARGET DIR:\t%s" % target_directory)
     print("URL ROOT:\t%s" % url)
     print("SPECIES:\t%s" % data_table_entry['species'])
     print("VERSION:\t%s" % data_table_entry['version'])
@@ -219,10 +229,6 @@
     print("DBKEY:\t%s" % data_table_entry['dbkey'])
     print("NAME:\t%s" % data_table_entry['name'])
     print("PREFIX:\t%s" % data_table_entry['prefix'])
-    print("____________________________________________________________")
-    print("*** Intial dictionary")
-    print("%s" % params)
-
 
     shutil.copyfile(stranded_index_name, os.path.join(target_directory, stranded_index_name))
     shutil.copyfile(unstranded_index_name, os.path.join(target_directory, unstranded_index_name))