Mercurial > repos > rmarenco > hubarchivecreator

--- a/Bam.py	Wed Jul 20 12:29:08 2016 -0400
+++ b/Bam.py	Thu Jul 21 05:58:51 2016 -0400
@@ -15,12 +15,8 @@


 class Bam( Datatype ):
-    def __init__( self, input_bam_false_path, data_bam ,
-                 inputFastaFile, extra_files_path, tool_directory ):
-        super(Bam, self).__init__( input_fasta_file=inputFastaFile,
-                                   extra_files_path=extra_files_path,
-                                   tool_directory=tool_directory,
-                                   )
+    def __init__(self, input_bam_false_path, data_bam):
+        super(Bam, self).__init__()

         self.track = None
Binary file Bam.pyc has changed
--- a/Bed.py	Wed Jul 20 12:29:08 2016 -0400
+++ b/Bed.py	Thu Jul 21 05:58:51 2016 -0400
@@ -11,19 +11,14 @@


 class Bed( Datatype ):
-    def __init__( self, inputBedGeneric, data_bed_generic,
-                 inputFastaFile, extra_files_path, tool_directory ):
-        super(Bed, self).__init__(
-            inputFastaFile, extra_files_path, tool_directory
-        )
+    def __init__( self, inputBedGeneric, data_bed_generic):
+        super(Bed, self).__init__()

         self.track = None

         self.inputBedGeneric = inputBedGeneric

         self.sortedBedFile = tempfile.NamedTemporaryFile(suffix=".sortedBed")
-        self.chromSizesFile = tempfile.NamedTemporaryFile(bufsize=0, suffix=".chrom.sizes")
-        self.twoBitInfoFile = tempfile.NamedTemporaryFile(bufsize=0)

         self.data_bed_generic = data_bed_generic
         self.name_bed_generic = self.data_bed_generic["name"]
@@ -32,15 +27,6 @@
         # Sort processing
         subtools.sort(self.inputBedGeneric, self.sortedBedFile.name)

-        # Generate the chrom.sizes
-        # TODO: Isolate in a function
-        # We first get the twoBit Infos
-        subtools.twoBitInfo(self.twoBitFile.name, self.twoBitInfoFile.name)
-
-        # Then we get the output to inject into the sort
-        # TODO: Check if no errors
-        subtools.sortChromSizes(self.twoBitInfoFile.name, self.chromSizesFile.name)
-
         # bedToBigBed processing
         # TODO: Change the name of the bb, to tool + genome + possible adding if multiple +  .bb
         trackName = "".join( ( self.name_bed_generic, ".bb") )
Binary file Bed.pyc has changed
--- a/BedSimpleRepeats.py	Wed Jul 20 12:29:08 2016 -0400
+++ b/BedSimpleRepeats.py	Thu Jul 21 05:58:51 2016 -0400
@@ -10,40 +10,26 @@


 class BedSimpleRepeats( Datatype ):
-    def __init__( self, input_bed_simple_repeats_false_path, data_bed_simple_repeats,
-                 input_fasta_file, extra_files_path, tool_directory ):
+    def __init__(self, input_bed_simple_repeats_false_path, data_bed_simple_repeats):

-        super(BedSimpleRepeats, self).__init__(
-                input_fasta_file, extra_files_path, tool_directory
-        )
+        super(BedSimpleRepeats, self).__init__()

         self.input_bed_simple_repeats_false_path = input_bed_simple_repeats_false_path
         self.name_bed_simple_repeats = data_bed_simple_repeats["name"]
         self.priority = data_bed_simple_repeats["order_index"]

         sortedBedFile = tempfile.NamedTemporaryFile(suffix=".sortedBed")
-        twoBitInfoFile = tempfile.NamedTemporaryFile(bufsize=0)
-        chromSizesFile = tempfile.NamedTemporaryFile(bufsize=0, suffix=".chrom.sizes")

         # Sort processing
         subtools.sort(self.input_bed_simple_repeats_false_path, sortedBedFile.name)

-        # TODO: Regroup in an mother class which handles the Chrom.sizes creation with Gff3 and Gtf
-        # Generate the chrom.sizes
-
-        subtools.twoBitInfo(self.twoBitFile.name, twoBitInfoFile.name)
-
-        # Then we get the output to inject into the sort
-        # TODO: Check if no errors
-        subtools.sortChromSizes(twoBitInfoFile.name, chromSizesFile.name)
-
         # bedToBigBed processing
         # TODO: Change the name of the bb, to tool + genome + .bb
         trackName = "".join( ( self.name_bed_simple_repeats, '.bb' ) )
         myBigBedFilePath = os.path.join(self.myTrackFolderPath, trackName)
         auto_sql_option = "%s%s" % ('-as=', os.path.join(self.tool_directory, 'trf_simpleRepeat.as'))
         with open(myBigBedFilePath, 'w') as bigBedFile:
-            subtools.bedToBigBed(sortedBedFile.name, chromSizesFile.name, bigBedFile.name,
+            subtools.bedToBigBed(sortedBedFile.name, self.chromSizesFile.name, bigBedFile.name,
                                  typeOption='-type=bed4+12',
                                  autoSql=auto_sql_option)
Binary file BedSimpleRepeats.pyc has changed
--- a/BigWig.py	Wed Jul 20 12:29:08 2016 -0400
+++ b/BigWig.py	Thu Jul 21 05:58:51 2016 -0400
@@ -10,11 +10,8 @@


 class BigWig( Datatype ):
-    def __init__(self, input_bigwig_path, data_bigwig,
-                 input_fasta_path, extra_files_path, tool_directory):
-        super(BigWig, self).__init__(
-                input_fasta_path, extra_files_path, tool_directory
-        )
+    def __init__(self, input_bigwig_path, data_bigwig):
+        super(BigWig, self).__init__()

         self.track = None
Binary file BigWig.pyc has changed
--- a/Datatype.py	Wed Jul 20 12:29:08 2016 -0400
+++ b/Datatype.py	Thu Jul 21 05:58:51 2016 -0400
@@ -6,6 +6,7 @@
 """

 import os
+import tempfile

 from util import subtools

@@ -14,32 +15,54 @@

     twoBitFile = None

-    def __init__( self, input_fasta_file, extra_files_path, tool_directory ):
+    input_fasta_file = None
+    extra_files_path = None
+    tool_directory = None

-        self.input_fasta_file = input_fasta_file
-        self.extra_files_path = extra_files_path
-        self.tool_directory = tool_directory
+    mySpecieFolderPath = None
+    myTrackFolderPath = None
+
+    twoBitFile = None
+    chromSizesFile = None

-        self.twoBitFile = None
+    def __init__(self):

-        # Construction of the arborescence
-        # TODO: Change the hard-coded path with a input based one
-        self.mySpecieFolderPath = os.path.join(extra_files_path, "myHub", "dbia3")
+        not_init_message = "The {0} is not initialized." \
+                           "Did you use pre_init static method first?"
+        if Datatype.input_fasta_file is None:
+            raise TypeError(not_init_message.format('reference genome'))
+        if Datatype.extra_files_path is None:
+            raise TypeError(not_init_message.format('track Hub path'))
+        if Datatype.tool_directory is None:
+            raise TypeError(not_init_message.format('tool directory'))
+

-        # TODO: Refactor the name of the folder "tracks" into one variable, and should be inside TrackHub object
-        self.myTrackFolderPath = os.path.join(self.mySpecieFolderPath, "tracks")
+    @staticmethod
+    def pre_init(reference_genome, two_bit_path, chrom_sizes_file,
+                 extra_files_path, tool_directory, specie_folder, tracks_folder):
+        Datatype.extra_files_path = extra_files_path
+        Datatype.tool_directory = tool_directory

-        # TODO: Redundant, should be refactored because they are all doing it...into hubArchiveCreator?
+        # TODO: All this should be in TrackHub and not in Datatype
+        Datatype.mySpecieFolderPath = specie_folder
+        Datatype.myTrackFolderPath = tracks_folder
+
+        Datatype.input_fasta_file = reference_genome
+
         # 2bit file creation from input fasta
-        if not Datatype.twoBitFile:
-            print "We create the self.twoBit in " + self.__class__.__name__
-            Datatype.twoBitFile = subtools.faToTwoBit(self.input_fasta_file, self.mySpecieFolderPath)
+        Datatype.twoBitFile = two_bit_path
+        Datatype.chromSizesFile = chrom_sizes_file

-        # TODO: Remove this by saying to all children classes to use "Datatype.twoBitFile" instead
-        self.twoBitFile = Datatype.twoBitFile
+    @staticmethod
+    def get_largest_scaffold_name(self):
+        # We can get the biggest scaffold here, with chromSizesFile
+        with open(Datatype.chromSizesFile.name, 'r') as chrom_sizes:
+            # TODO: Check if exists
+            return chrom_sizes.readline().split()[0]

+    # TODO: Rename for PEP8
     def getShortName( self, name_to_shortify ):
         # Slice to get from Long label the short label
         short_label_slice = slice(0, 15)

-        return name_to_shortify[short_label_slice]
\ No newline at end of file
+        return name_to_shortify[short_label_slice]
Binary file Datatype.pyc has changed
--- a/Gff3.py	Wed Jul 20 12:29:08 2016 -0400
+++ b/Gff3.py	Thu Jul 21 05:58:51 2016 -0400
@@ -11,11 +11,8 @@


 class Gff3( Datatype ):
-    def __init__( self, input_Gff3_false_path, data_gff3,
-                  input_fasta_false_path, extra_files_path, tool_directory ):
-        super( Gff3, self ).__init__(
-                input_fasta_false_path, extra_files_path, tool_directory
-        )
+    def __init__(self, input_Gff3_false_path, data_gff3):
+        super( Gff3, self ).__init__()

         self.track = None

@@ -29,8 +26,6 @@
         sortedBedFile = tempfile.NamedTemporaryFile(suffix=".sortedBed")

         # TODO: Refactor into another Class to manage the twoBitInfo and ChromSizes (same process as in Gtf.py)
-        twoBitInfoFile = tempfile.NamedTemporaryFile(bufsize=0)
-        chromSizesFile = tempfile.NamedTemporaryFile(bufsize=0, suffix=".chrom.sizes")

         # gff3ToGenePred processing
         subtools.gff3ToGenePred(self.input_Gff3_false_path, genePredFile.name)
@@ -42,19 +37,14 @@
         # Sort processing
         subtools.sort(unsortedBedFile.name, sortedBedFile.name)

-        # Generate the twoBitInfo
-        subtools.twoBitInfo(self.twoBitFile.name, twoBitInfoFile.name)
-
-        # Then we get the output to generate the chromSizes
         # TODO: Check if no errors
-        subtools.sortChromSizes(twoBitInfoFile.name, chromSizesFile.name)

         # bedToBigBed processing
         # TODO: Change the name of the bb, to tool + genome + possible adding if multiple +  .bb
         trackName = "".join( (self.name_gff3, ".bb" ) )
         myBigBedFilePath = os.path.join(self.myTrackFolderPath, trackName)
         with open(myBigBedFilePath, 'w') as bigBedFile:
-            subtools.bedToBigBed(sortedBedFile.name, chromSizesFile.name, bigBedFile.name)
+            subtools.bedToBigBed(sortedBedFile.name, self.chromSizesFile.name, bigBedFile.name)

         # Create the Track Object
         dataURL = "tracks/%s" % trackName
Binary file Gff3.pyc has changed
--- a/Gtf.py	Wed Jul 20 12:29:08 2016 -0400
+++ b/Gtf.py	Thu Jul 21 05:58:51 2016 -0400
@@ -11,11 +11,9 @@


 class Gtf( Datatype ):
-    def __init__( self, input_gtf_false_path, data_gtf,
-                 input_fasta_file, extra_files_path, tool_directory ):
-        super(Gtf, self).__init__( input_fasta_file=input_fasta_file,
-                                   extra_files_path=extra_files_path,
-                                   tool_directory=tool_directory )
+    def __init__( self, input_gtf_false_path, data_gtf):
+
+        super(Gtf, self).__init__()

         self.track = None

@@ -30,9 +28,6 @@
         unsortedBedFile = tempfile.NamedTemporaryFile(bufsize=0, suffix=".unsortedBed")
         sortedBedFile = tempfile.NamedTemporaryFile(suffix=".sortedBed")

-        twoBitInfoFile = tempfile.NamedTemporaryFile(bufsize=0)
-        chromSizesFile = tempfile.NamedTemporaryFile(bufsize=0, suffix=".chrom.sizes")
-
         # GtfToGenePred
         subtools.gtfToGenePred(self.input_gtf_false_path, genePredFile.name)

@@ -43,20 +38,12 @@
         # Sort processing
         subtools.sort(unsortedBedFile.name, sortedBedFile.name)

-        # TODO: Chehck if the twoBitInfo / ChromSizes is redundant and make an intermediate class
-        # Generate the twoBitInfo
-        subtools.twoBitInfo(self.twoBitFile.name, twoBitInfoFile.name)
-
-        # Then we get the output to generate the chromSizes
-        # TODO: Check if no errors
-        subtools.sortChromSizes(twoBitInfoFile.name, chromSizesFile.name)
-
         # bedToBigBed processing
         # TODO: Change the name of the bb, to tool + genome + possible adding if multiple +  .bb
         trackName = "".join( ( self.name_gtf, ".bb") )
         myBigBedFilePath = os.path.join(self.myTrackFolderPath, trackName)
         with open(myBigBedFilePath, 'w') as bigBedFile:
-            subtools.bedToBigBed(sortedBedFile.name, chromSizesFile.name, bigBedFile.name)
+            subtools.bedToBigBed(sortedBedFile.name, self.chromSizesFile.name, bigBedFile.name)

         # Create the Track Object
         dataURL = "tracks/%s" % trackName
Binary file Gtf.pyc has changed
--- a/TrackHub.py	Wed Jul 20 12:29:08 2016 -0400
+++ b/TrackHub.py	Thu Jul 21 05:58:51 2016 -0400
@@ -2,21 +2,35 @@
 # -*- coding: utf8 -*-

 import os
+import tempfile
+import shutil
 import zipfile

+# Internal dependencies
+from Datatype import Datatype
+from util import subtools
+
 from mako.lookup import TemplateLookup


 class TrackHub(object):
     """docstring for TrackHub"""

-    def __init__(self, inputFastaFile, outputFile, extra_files_path, tool_directory):
+    def __init__(self, inputFastaFile, user_email, outputFile, extra_files_path, tool_directory):
         super(TrackHub, self).__init__()

         self.rootAssemblyHub = None
+
         self.mySpecieFolderPath = None
+        self.myTracksFolderPath = None
         self.tool_directory = tool_directory

+        self.reference_genome = inputFastaFile
+        # TODO: Add the specie name
+        self.genome_name = inputFastaFile.assembly_id
+        self.default_pos = None
+        self.user_email = user_email
+
         # TODO: Modify according to the files passed in parameter
         mylookup = TemplateLookup(directories=[os.path.join(tool_directory, 'templates/trackDb')],
                                   output_encoding='utf-8', encoding_errors='replace')
@@ -25,17 +39,21 @@
         self.extra_files_path = extra_files_path
         self.outputFile = outputFile

-        inputFastaFile = open(inputFastaFile, 'r')
-        #self.outputZip = zipfile.ZipFile(os.path.join(extra_files_path, 'myHub.zip'), 'w', allowZip64=True)
-
         # Create the structure of the Assembly Hub
         # TODO: Merge the following processing into a function as it is also used in twoBitCreator
-        baseNameFasta = os.path.basename(inputFastaFile.name)
-        suffixTwoBit, extensionTwoBit = os.path.splitext(baseNameFasta)
-        self.twoBitName = suffixTwoBit + '.2bit'
+        self.twoBitName = None
+        self.two_bit_final_path = None
+        self.chromSizesFile = None
+
+        self.default_pos = None

-        self.rootAssemblyHub = self.__createAssemblyHub__(toolDirectory=tool_directory,
-                                                          extra_files_path=extra_files_path)
+        # Set all the missing variables of this class, and create physically the folders/files
+        self.rootAssemblyHub = self.__createAssemblyHub__(extra_files_path=extra_files_path)
+
+        # Init the Datatype
+        Datatype.pre_init(self.reference_genome, self.two_bit_final_path, self.chromSizesFile,
+                          self.extra_files_path, self.tool_directory,
+                          self.mySpecieFolderPath, self.myTracksFolderPath)

     def createZip(self):
         for root, dirs, files in os.walk(self.rootAssemblyHub):
@@ -60,124 +78,149 @@

     def terminate(self):
         # Just a test to output a simple HTML
+        # TODO: Create a class to handle the file object
+        mylookup = TemplateLookup(directories=[os.path.join(self.tool_directory, 'templates')],
+                                  output_encoding='utf-8', encoding_errors='replace')
+
+        mytemplate = mylookup.get_template('display.txt')
         with open(self.outputFile, 'w') as htmlOutput:
-            htmlOutput.write('<html>')
-            htmlOutput.write('<body>')
-            htmlOutput.write('<p>')
-            htmlOutput.write('The following has been generated by Hub Archive Creator:')
-            htmlOutput.write('</p>')
-            htmlOutput.write('<ul>')
+            # TODO: We are basically looping two times: One time with os.walk, Second time
+            # with the template. We could improve that if the number of files begins to be really important
+            list_relative_file_path = [ ]
             for root, dirs, files in os.walk(self.extra_files_path):
                 for file in files:
-                    relDir = os.path.relpath(root, self.extra_files_path)
-                    htmlOutput.write(str.format('<li><a href="{0}">{1}</a></li>', os.path.join(relDir, file),
-                                                os.path.join(relDir, file)))
-            htmlOutput.write('<ul>')
-            htmlOutput.write('</body>')
-            htmlOutput.write('</html>')
+                    relative_directory = os.path.relpath(root, self.extra_files_path)
+                    relative_file_path = os.path.join(relative_directory, file)
+                    list_relative_file_path.append(relative_file_path)
+
+            htmlMakoRendered = mytemplate.render(
+                list_relative_file_path=list_relative_file_path
+            )
+            htmlOutput.write(htmlMakoRendered)
+
+    def __createAssemblyHub__(self, extra_files_path):
+        # Get all necessaries infos first
+        # 2bit file creation from input fasta

-    def __createAssemblyHub__(self, toolDirectory, extra_files_path):
+        # baseNameFasta = os.path.basename(fasta_file_name)
+        # suffixTwoBit, extensionTwoBit = os.path.splitext(baseNameFasta)
+        # nameTwoBit = suffixTwoBit + '.2bit'
+        twoBitFile = tempfile.NamedTemporaryFile(bufsize=0)
+        subtools.faToTwoBit(self.reference_genome.false_path, twoBitFile.name)
+
+        # Generate the twoBitInfo
+        twoBitInfoFile = tempfile.NamedTemporaryFile(bufsize=0)
+        subtools.twoBitInfo(twoBitFile.name, twoBitInfoFile.name)
+
+        # Then we get the output to generate the chromSizes
+        self.chromSizesFile = tempfile.NamedTemporaryFile(bufsize=0, suffix=".chrom.sizes")
+        subtools.sortChromSizes(twoBitInfoFile.name, self.chromSizesFile.name)
+
+        # We can get the biggest scaffold here, with chromSizesFile
+        with open(self.chromSizesFile.name, 'r') as chrom_sizes:
+            # TODO: Check if exists
+            self.default_pos = chrom_sizes.readline().split()[0]
+
         # TODO: Manage to put every fill Function in a file dedicated for reading reasons
         # Create the root directory
         myHubPath = os.path.join(extra_files_path, "myHub")
         if not os.path.exists(myHubPath):
             os.makedirs(myHubPath)

+        # Create the specie folder
+        # TODO: Generate the name depending on the specie
+        mySpecieFolderPath = os.path.join(myHubPath, self.genome_name)
+        if not os.path.exists(mySpecieFolderPath):
+            os.makedirs(mySpecieFolderPath)
+        self.mySpecieFolderPath = mySpecieFolderPath
+
+        # We create the 2bit file while we just created the specie folder
+        self.twoBitName = self.genome_name + ".2bit"
+        self.two_bit_final_path = os.path.join(self.mySpecieFolderPath, self.twoBitName)
+        shutil.copyfile(twoBitFile.name, self.two_bit_final_path)
+
         # Add the genomes.txt file
         genomesTxtFilePath = os.path.join(myHubPath, 'genomes.txt')
-        self.__fillGenomesTxt__(genomesTxtFilePath, toolDirectory)
+        self.__fillGenomesTxt__(genomesTxtFilePath)

         # Add the hub.txt file
         hubTxtFilePath = os.path.join(myHubPath, 'hub.txt')
-        self.__fillHubTxt__(hubTxtFilePath, toolDirectory)
+        self.__fillHubTxt__(hubTxtFilePath)

         # Add the hub.html file
         # TODO: Change the name and get it depending on the specie
         hubHtmlFilePath = os.path.join(myHubPath, 'dbia.html')
-        self.__fillHubHtmlFile__(hubHtmlFilePath, toolDirectory)
+        self.__fillHubHtmlFile__(hubHtmlFilePath)

-        # Create the specie folder
-        # TODO: Generate the name depending on the specie
-        mySpecieFolderPath = os.path.join(myHubPath, "dbia3")
-        if not os.path.exists(mySpecieFolderPath):
-            os.makedirs(mySpecieFolderPath)
-        self.mySpecieFolderPath = mySpecieFolderPath

         # Create the description html file in the specie folder
         descriptionHtmlFilePath = os.path.join(mySpecieFolderPath, 'description.html')
-        self.__fillDescriptionHtmlFile__(descriptionHtmlFilePath, toolDirectory)
+        self.__fillDescriptionHtmlFile__(descriptionHtmlFilePath)

         # Create the file groups.txt
         # TODO: If not inputs for this, do no create the file
         groupsTxtFilePath = os.path.join(mySpecieFolderPath, 'groups.txt')
-        self.__fillGroupsTxtFile__(groupsTxtFilePath, toolDirectory)
+        self.__fillGroupsTxtFile__(groupsTxtFilePath)

         # Create the folder tracks into the specie folder
         tracksFolderPath = os.path.join(mySpecieFolderPath, "tracks")
         if not os.path.exists(tracksFolderPath):
             os.makedirs(tracksFolderPath)
+        self.myTracksFolderPath = tracksFolderPath

         return myHubPath

-    def __fillGenomesTxt__(self, genomesTxtFilePath, toolDirectory):
+    def __fillGenomesTxt__(self, genomesTxtFilePath):
         # TODO: Think about the inputs and outputs
         # TODO: Manage the template of this file
         # renderer = pystache.Renderer(search_dirs="templates/genomesAssembly")
-        pathTemplate = os.path.join(toolDirectory, 'templates/genomesAssembly')
+        pathTemplate = os.path.join(self.tool_directory, 'templates/genomesAssembly')
         mylookup = TemplateLookup(directories=[pathTemplate], output_encoding='utf-8', encoding_errors='replace')
         mytemplate = mylookup.get_template("layout.txt")
         with open(genomesTxtFilePath, 'w') as genomesTxtFile:
             # Write the content of the file genomes.txt
-            twoBitPath = os.path.join('dbia3/', self.twoBitName)
+            twoBitPath = os.path.join(self.genome_name, self.twoBitName)
             htmlMakoRendered = mytemplate.render(
-                genomeName="dbia3",
-                trackDbPath="dbia3/trackDb.txt",
-                groupsPath="dbia3/groups.txt",
-                genomeDescription="March 2013 Drosophilia biarmipes unplaced genomic scaffold",
+                genomeName=self.genome_name,
+                trackDbPath=os.path.join(self.genome_name, "trackDb.txt"),
+                groupsPath=os.path.join(self.genome_name, "groups.txt"),
+                genomeDescription=self.genome_name,
                 twoBitPath=twoBitPath,
-                organismName="Drosophilia biarmipes",
-                defaultPosition="contig1",
+                organismName=self.genome_name,
+                defaultPosition=self.default_pos,
                 orderKey="4500",
-                scientificName="Drosophilia biarmipes",
-                pathAssemblyHtmlDescription="dbia3/description.html"
+                scientificName=self.genome_name,
+                pathAssemblyHtmlDescription=os.path.join(self.genome_name, "description.html")
             )
             genomesTxtFile.write(htmlMakoRendered)

-    def __fillHubTxt__(self, hubTxtFilePath, toolDirectory):
+    def __fillHubTxt__(self, hubTxtFilePath):
         # TODO: Think about the inputs and outputs
         # TODO: Manage the template of this file
-        mylookup = TemplateLookup(directories=[os.path.join(toolDirectory, 'templates/hubTxt')],
+        mylookup = TemplateLookup(directories=[os.path.join(self.tool_directory, 'templates/hubTxt')],
                                   output_encoding='utf-8', encoding_errors='replace')
         mytemplate = mylookup.get_template('layout.txt')
         with open(hubTxtFilePath, 'w') as genomesTxtFile:
             # Write the content of the file genomes.txt
             htmlMakoRendered = mytemplate.render(
-                hubName='dbiaOnly',
-                shortLabel='dbia',
-                longLabel='This hub only contains dbia with the gene predictions',
+                hubName=(''.join(['gonramp', self.genome_name.title()])),
+                shortLabel=self.genome_name,
+                longLabel=self.genome_name,
                 genomesFile='genomes.txt',
-                email='rmarenco@gwu.edu',
+                email=self.user_email,
                 descriptionUrl='dbia.html'
             )
             genomesTxtFile.write(htmlMakoRendered)

-    def __fillHubHtmlFile__(self, hubHtmlFilePath, toolDirectory):
+    def __fillHubHtmlFile__(self, hubHtmlFilePath):
         # TODO: Think about the inputs and outputs
         # TODO: Manage the template of this file
         # renderer = pystache.Renderer(search_dirs="templates/hubDescription")
         # t = Template(templates.hubDescription.layout.html)
-        mylookup = TemplateLookup(directories=[os.path.join(toolDirectory, 'templates/hubDescription')],
+        mylookup = TemplateLookup(directories=[os.path.join(self.tool_directory, 'templates/hubDescription')],
                                   output_encoding='utf-8', encoding_errors='replace')
         mytemplate = mylookup.get_template("layout.txt")
         with open(hubHtmlFilePath, 'w') as hubHtmlFile:
-            # Write the content of the file genomes.txt
-            # htmlPystached = renderer.render_name(
-            #     "layout",
-            #     {'specie': 'Dbia',
-            #     'toolUsed': 'Augustus',
-            #     'ncbiSpecieUrl': 'http://www.ncbi.nlm.nih.gov/genome/3499',
-            #     'genomeID': '3499',
-            #     'SpecieFullName': 'Drosophila biarmipes'})
             htmlMakoRendered = mytemplate.render(
                 specie='Dbia',
                 toolUsed='Augustus',
@@ -185,13 +228,12 @@
                 genomeID='3499',
                 specieFullName='Drosophila biarmipes'
             )
-            # hubHtmlFile.write(htmlPystached)
-            hubHtmlFile.write(htmlMakoRendered)
+            #hubHtmlFile.write(htmlMakoRendered)

-    def __fillDescriptionHtmlFile__(self, descriptionHtmlFilePath, toolDirectory):
+    def __fillDescriptionHtmlFile__(self, descriptionHtmlFilePath):
         # TODO: Think about the inputs and outputs
         # TODO: Manage the template of this file
-        mylookup = TemplateLookup(directories=[os.path.join(toolDirectory, 'templates/specieDescription')],
+        mylookup = TemplateLookup(directories=[os.path.join(self.tool_directory, 'templates/specieDescription')],
                                   output_encoding='utf-8', encoding_errors='replace')
         mytemplate = mylookup.get_template("layout.txt")
         with open(descriptionHtmlFilePath, 'w') as descriptionHtmlFile:
@@ -199,11 +241,11 @@
             htmlMakoRendered = mytemplate.render(
                 specieDescription='This is the description of the dbia',
             )
-            descriptionHtmlFile.write(htmlMakoRendered)
+            #descriptionHtmlFile.write(htmlMakoRendered)

-    def __fillGroupsTxtFile__(self, groupsTxtFilePath, toolDirectory):
+    def __fillGroupsTxtFile__(self, groupsTxtFilePath):
         # TODO: Reenable this function at some point
-        mylookup = TemplateLookup(directories=[os.path.join(toolDirectory, 'templates/groupsTxt')],
+        mylookup = TemplateLookup(directories=[os.path.join(self.tool_directory, 'templates/groupsTxt')],
                                   output_encoding='utf-8', encoding_errors='replace')
         mytemplate = mylookup.get_template("layout.txt")
         with open(groupsTxtFilePath, 'w') as groupsTxtFile:
Binary file TrackHub.pyc has changed
--- a/hubArchiveCreator.py	Wed Jul 20 12:29:08 2016 -0400
+++ b/hubArchiveCreator.py	Thu Jul 21 05:58:51 2016 -0400
@@ -14,13 +14,14 @@
 import sys

 # Internal dependencies
-from TrackHub import TrackHub
-from Gff3 import Gff3
 from Bam import Bam
 from BedSimpleRepeats import BedSimpleRepeats
 from Bed import Bed
 from BigWig import BigWig
+from util.Fasta import Fasta
+from Gff3 import Gff3
 from Gtf import Gtf
+from TrackHub import TrackHub


 # TODO: Verify each subprocessed dependency is accessible [gff3ToGenePred, genePredToBed, twoBitInfo, faToTwoBit, bedToBigBed, sort
@@ -62,6 +63,10 @@

     parser.add_argument('-j', '--data_json', help='Json containing the metadata of the inputs')

+    parser.add_argument('--user_email', help='Email of the user who launched the Hub Archive Creation')
+
+    parser.add_argument('--genome_name', help='UCSC Genome Browser assembly ID')
+
     ucsc_tools_path = ''

     toolDirectory = '.'
@@ -70,11 +75,20 @@
     # Get the args passed in parameter
     args = parser.parse_args()

-    input_fasta_file = args.fasta
+    array_inputs_reference_genome = json.loads(args.fasta)
+
+    # TODO: Replace these with the object Fasta
+    input_fasta_file = array_inputs_reference_genome["false_path"]
+    input_fasta_file_name = sanitize_name_input(array_inputs_reference_genome["name"])
+    genome_name = sanitize_name_input(args.genome_name)
+
+    reference_genome = Fasta(array_inputs_reference_genome["false_path"],
+                             input_fasta_file_name, genome_name)
+
+    user_email = args.user_email

     # TODO: Add array for each input because we can add multiple -b for example + filter the data associated

-
     array_inputs_gff3 = args.gff3
     array_inputs_bed_simple_repeats = args.bedSimpleRepeats
     array_inputs_bed_generic = args.bed
@@ -96,42 +110,38 @@
     if args.extra_files_path:
         extra_files_path = args.extra_files_path

-    # TODO: Check here all the binaries / tools we need. Exception is missing
+    # TODO: Check here all the binaries / tools we need. Exception if missing

     # Create the Track Hub folder
-    trackHub = TrackHub(input_fasta_file, outputFile, extra_files_path, toolDirectory)
+    trackHub = TrackHub(reference_genome, user_email, outputFile, extra_files_path, toolDirectory)

     all_datatype_dictionary = {}

+    datatype_parameters = (inputs_data, all_datatype_dictionary)
+
     # Process Augustus
     if array_inputs_gff3:
-        create_ordered_datatype_objects(Gff3, array_inputs_gff3, inputs_data, input_fasta_file,
-                                        extra_files_path, all_datatype_dictionary, toolDirectory)
+        create_ordered_datatype_objects(Gff3, array_inputs_gff3, *datatype_parameters)

-    # Process Bed simple repeats => From Tandem Repeats Finder / TrfBig
+    # Process Bed simple repeats
     if array_inputs_bed_simple_repeats:
-        create_ordered_datatype_objects(BedSimpleRepeats, array_inputs_bed_simple_repeats, inputs_data, input_fasta_file,
-                                        extra_files_path, all_datatype_dictionary, toolDirectory)
+        create_ordered_datatype_objects(BedSimpleRepeats, array_inputs_bed_simple_repeats, *datatype_parameters)

-    # Process a Bed => tBlastN or TopHat
+    # Process Bed
     if array_inputs_bed_generic:
-        create_ordered_datatype_objects(Bed, array_inputs_bed_generic, inputs_data, input_fasta_file,
-                                        extra_files_path, all_datatype_dictionary, toolDirectory)
+        create_ordered_datatype_objects(Bed, array_inputs_bed_generic, *datatype_parameters)

-    # Process a GTF => Tophat
+    # Process GTF
     if array_inputs_gtf:
-        create_ordered_datatype_objects(Gtf, array_inputs_gtf, inputs_data, input_fasta_file,
-                                        extra_files_path, all_datatype_dictionary, toolDirectory)
+        create_ordered_datatype_objects(Gtf, array_inputs_gtf, *datatype_parameters)

-    # Process a Bam => Tophat
+    # Process Bam
     if array_inputs_bam:
-        create_ordered_datatype_objects(Bam, array_inputs_bam, inputs_data, input_fasta_file,
-                                        extra_files_path, all_datatype_dictionary, toolDirectory)
+        create_ordered_datatype_objects(Bam, array_inputs_bam, *datatype_parameters)

-    # Process a BigWig => From Bam
+    # Process BigWig
     if array_inputs_bigwig:
-        create_ordered_datatype_objects(BigWig, array_inputs_bigwig, inputs_data, input_fasta_file,
-                                        extra_files_path, all_datatype_dictionary, toolDirectory)
+        create_ordered_datatype_objects(BigWig, array_inputs_bigwig, *datatype_parameters)

     # Create Ordered Dictionary to add the tracks in the tool form order
     all_datatype_ordered_dictionary = collections.OrderedDict(all_datatype_dictionary)
@@ -147,6 +157,10 @@

     sys.exit(0)

+def sanitize_name_input(string_to_sanitize):
+        return string_to_sanitize \
+            .replace("/", "_") \
+            .replace(" ", "_")

 def sanitize_name_inputs(inputs_data):
     """
@@ -156,22 +170,16 @@
     :return:
     """
     for key in inputs_data:
-        inputs_data[key]["name"] = inputs_data[key]["name"]\
-            .replace("/", "_")\
-            .replace(" ", "_")
+        inputs_data[key]["name"] = sanitize_name_input(inputs_data[key]["name"])


-def create_ordered_datatype_objects(ExtensionClass, array_inputs, inputs_data, input_fasta_file,
-                                    extra_files_path, all_datatype_dictionary, tool_directory):
+def create_ordered_datatype_objects(ExtensionClass, array_inputs, inputs_data, all_datatype_dictionary):
     """
     Function which executes the creation all the necessary files / folders for a special Datatype, for TrackHub
     and update the dictionary of datatype
     :param ExtensionClass: T <= Datatype
     :param array_inputs: list[string]
     :param inputs_data:
-    :param input_fasta_file: string
-    :param extra_files_path: string
-    :param tool_directory; string
     """

     datatype_dictionary = {}
@@ -180,8 +188,8 @@
     for input_false_path in array_inputs:
         for key, data_value in inputs_data.items():
             if key == input_false_path:
-                extensionObject = ExtensionClass(input_false_path, data_value,
-                                                 input_fasta_file, extra_files_path, tool_directory)
+                extensionObject = ExtensionClass(input_false_path, data_value)
+
                 datatype_dictionary.update({data_value["order_index"]: extensionObject})
     all_datatype_dictionary.update(datatype_dictionary)
--- a/hubArchiveCreator.xml	Wed Jul 20 12:29:08 2016 -0400
+++ b/hubArchiveCreator.xml	Thu Jul 21 05:58:51 2016 -0400
@@ -31,6 +31,9 @@
         mkdir -p $output.extra_files_path;
         python $__tool_directory__/hubArchiveCreator.py

+        ## Ask the user to enter the genome name
+        --genome_name '$genome_name'
+
         #import json

         #set global data_parameter_dict = {}
@@ -81,19 +84,32 @@
             #end if
         #end for

+        ## We combine the fasta file dataset name with his false path in a JSON object
+        #set fasta_json = json.dumps({"false_path": str($fasta_file), "name": $fasta_file.name})
+        -f '$fasta_json'
+
         ## Dump the final json
         #set all_data_json = json.dumps($data_parameter_dict)

-        -f $Fasta_File
         --data_json '$all_data_json'

+        ## Retrieve the user email
+        --user_email $__user_email__
+
         -d $__tool_directory__ -e $output.files_path -o $output;
     ]]></command>

     <inputs>
         <param
+                name="genome_name"
+                type="text"
+                size="30"
+                value="unknown"
+                label="UCSC Genome Browser assembly ID"
+        />
+        <param
                 format="fasta"
-                name="Fasta_File"
+                name="fasta_file"
                 type="data"
                 label="Reference genome"
         />
@@ -175,7 +191,7 @@
         <!-- Can also use assert_command to test command -->
         <!-- Testing GFF3 input -->
         <test>
-            <param name="Fasta_File" value="dbia3.fa"/>
+            <param name="fasta_file" value="dbia3.fa"/>
             <repeat name="format">
                 <conditional name="formatChoice">
                     <param name="format_select" value="gff3"/>
@@ -209,7 +225,7 @@
             </output>
         </test>
         <test>
-            <param name="Fasta_File" value="dbia3.fa"/>
+            <param name="fasta_file" value="dbia3.fa"/>
             <param name="GFF3" value="augustusDbia3.gff3"/>
             <output name="output" file="augustusOutput.html" lines_diff="2">
                 <extra_files type="directory" value="myHub"/>
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/templates/display.txt	Thu Jul 21 05:58:51 2016 -0400
@@ -0,0 +1,15 @@
+<%namespace name="os" module="os"/>
+<html>
+    <body>
+        <p>
+            The following has been generated by Hub Archive Creator:
+        </p>
+        <ul>
+            % for relative_file_path in list_relative_file_path:
+                <li>
+                    <a href="${relative_file_path}">${relative_file_path}</a>
+                </li>
+            % endfor
+        </ul>
+    </body>
+</html>
\ No newline at end of file
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/util/Fasta.py	Thu Jul 21 05:58:51 2016 -0400
@@ -0,0 +1,16 @@
+#!/usr/bin/python
+# -*- coding: utf8 -*-
+
+"""
+Class describing the Fasta format
+(As of the 07/20/2016, only used with the reference genome)
+"""
+
+class Fasta(object):
+    def __init__(self, false_path, name, assembly_id):
+        self.false_path = false_path
+        self.name = name
+
+        if not assembly_id:
+            assembly_id = "unknown"
+        self.assembly_id = assembly_id
\ No newline at end of file
Binary file util/Fasta.pyc has changed
--- a/util/subtools.py	Wed Jul 20 12:29:08 2016 -0400
+++ b/util/subtools.py	Thu Jul 21 05:58:51 2016 -0400
@@ -39,20 +39,16 @@
     return p


-def faToTwoBit(fasta_file_name, mySpecieFolder):
+def faToTwoBit(fasta_file_name, twoBitFile):
     """
     This function call faToTwoBit UCSC tool, and return the twoBitFile
     :param fasta_file_name:
     :param mySpecieFolder:
     :return:
     """
-    baseNameFasta = os.path.basename(fasta_file_name)
-    suffixTwoBit, extensionTwoBit = os.path.splitext(baseNameFasta)
-    nameTwoBit = suffixTwoBit + '.2bit'

-    with open(os.path.join(mySpecieFolder, nameTwoBit), 'w') as twoBitFile:
-        array_call = ['faToTwoBit', fasta_file_name, twoBitFile.name]
-        _handleExceptionAndCheckCall(array_call)
+    array_call = ['faToTwoBit', fasta_file_name, twoBitFile]
+    _handleExceptionAndCheckCall(array_call)

     return twoBitFile
Binary file util/subtools.pyc has changed