diff TrackHub.py @ 10:acc233161f50 draft

planemo upload for repository https://github.com/goeckslab/hub-archive-creator commit 1b1063f90004764bcf504f4340738eca5c4b1f9d
author rmarenco
date Thu, 21 Jul 2016 05:58:51 -0400
parents 4f9847539a28
children d05236b15f81
line wrap: on
line diff
--- a/TrackHub.py	Wed Jul 20 12:29:08 2016 -0400
+++ b/TrackHub.py	Thu Jul 21 05:58:51 2016 -0400
@@ -2,21 +2,35 @@
 # -*- coding: utf8 -*-
 
 import os
+import tempfile
+import shutil
 import zipfile
 
+# Internal dependencies
+from Datatype import Datatype
+from util import subtools
+
 from mako.lookup import TemplateLookup
 
 
 class TrackHub(object):
     """docstring for TrackHub"""
 
-    def __init__(self, inputFastaFile, outputFile, extra_files_path, tool_directory):
+    def __init__(self, inputFastaFile, user_email, outputFile, extra_files_path, tool_directory):
         super(TrackHub, self).__init__()
 
         self.rootAssemblyHub = None
+
         self.mySpecieFolderPath = None
+        self.myTracksFolderPath = None
         self.tool_directory = tool_directory
 
+        self.reference_genome = inputFastaFile
+        # TODO: Add the specie name
+        self.genome_name = inputFastaFile.assembly_id
+        self.default_pos = None
+        self.user_email = user_email
+
         # TODO: Modify according to the files passed in parameter
         mylookup = TemplateLookup(directories=[os.path.join(tool_directory, 'templates/trackDb')],
                                   output_encoding='utf-8', encoding_errors='replace')
@@ -25,17 +39,21 @@
         self.extra_files_path = extra_files_path
         self.outputFile = outputFile
 
-        inputFastaFile = open(inputFastaFile, 'r')
-        #self.outputZip = zipfile.ZipFile(os.path.join(extra_files_path, 'myHub.zip'), 'w', allowZip64=True)
-
         # Create the structure of the Assembly Hub
         # TODO: Merge the following processing into a function as it is also used in twoBitCreator
-        baseNameFasta = os.path.basename(inputFastaFile.name)
-        suffixTwoBit, extensionTwoBit = os.path.splitext(baseNameFasta)
-        self.twoBitName = suffixTwoBit + '.2bit'
+        self.twoBitName = None
+        self.two_bit_final_path = None
+        self.chromSizesFile = None
+
+        self.default_pos = None
 
-        self.rootAssemblyHub = self.__createAssemblyHub__(toolDirectory=tool_directory,
-                                                          extra_files_path=extra_files_path)
+        # Set all the missing variables of this class, and create physically the folders/files
+        self.rootAssemblyHub = self.__createAssemblyHub__(extra_files_path=extra_files_path)
+
+        # Init the Datatype
+        Datatype.pre_init(self.reference_genome, self.two_bit_final_path, self.chromSizesFile,
+                          self.extra_files_path, self.tool_directory,
+                          self.mySpecieFolderPath, self.myTracksFolderPath)
 
     def createZip(self):
         for root, dirs, files in os.walk(self.rootAssemblyHub):
@@ -60,124 +78,149 @@
 
     def terminate(self):
         # Just a test to output a simple HTML
+        # TODO: Create a class to handle the file object
+        mylookup = TemplateLookup(directories=[os.path.join(self.tool_directory, 'templates')],
+                                  output_encoding='utf-8', encoding_errors='replace')
+
+        mytemplate = mylookup.get_template('display.txt')
         with open(self.outputFile, 'w') as htmlOutput:
-            htmlOutput.write('<html>')
-            htmlOutput.write('<body>')
-            htmlOutput.write('<p>')
-            htmlOutput.write('The following has been generated by Hub Archive Creator:')
-            htmlOutput.write('</p>')
-            htmlOutput.write('<ul>')
+            # TODO: We are basically looping two times: One time with os.walk, Second time
+            # with the template. We could improve that if the number of files begins to be really important
+            list_relative_file_path = [ ]
             for root, dirs, files in os.walk(self.extra_files_path):
                 for file in files:
-                    relDir = os.path.relpath(root, self.extra_files_path)
-                    htmlOutput.write(str.format('<li><a href="{0}">{1}</a></li>', os.path.join(relDir, file),
-                                                os.path.join(relDir, file)))
-            htmlOutput.write('<ul>')
-            htmlOutput.write('</body>')
-            htmlOutput.write('</html>')
+                    relative_directory = os.path.relpath(root, self.extra_files_path)
+                    relative_file_path = os.path.join(relative_directory, file)
+                    list_relative_file_path.append(relative_file_path)
+
+            htmlMakoRendered = mytemplate.render(
+                list_relative_file_path=list_relative_file_path
+            )
+            htmlOutput.write(htmlMakoRendered)
+
+    def __createAssemblyHub__(self, extra_files_path):
+        # Get all necessaries infos first
+        # 2bit file creation from input fasta
 
-    def __createAssemblyHub__(self, toolDirectory, extra_files_path):
+        # baseNameFasta = os.path.basename(fasta_file_name)
+        # suffixTwoBit, extensionTwoBit = os.path.splitext(baseNameFasta)
+        # nameTwoBit = suffixTwoBit + '.2bit'
+        twoBitFile = tempfile.NamedTemporaryFile(bufsize=0)
+        subtools.faToTwoBit(self.reference_genome.false_path, twoBitFile.name)
+
+        # Generate the twoBitInfo
+        twoBitInfoFile = tempfile.NamedTemporaryFile(bufsize=0)
+        subtools.twoBitInfo(twoBitFile.name, twoBitInfoFile.name)
+
+        # Then we get the output to generate the chromSizes
+        self.chromSizesFile = tempfile.NamedTemporaryFile(bufsize=0, suffix=".chrom.sizes")
+        subtools.sortChromSizes(twoBitInfoFile.name, self.chromSizesFile.name)
+
+        # We can get the biggest scaffold here, with chromSizesFile
+        with open(self.chromSizesFile.name, 'r') as chrom_sizes:
+            # TODO: Check if exists
+            self.default_pos = chrom_sizes.readline().split()[0]
+
         # TODO: Manage to put every fill Function in a file dedicated for reading reasons
         # Create the root directory
         myHubPath = os.path.join(extra_files_path, "myHub")
         if not os.path.exists(myHubPath):
             os.makedirs(myHubPath)
 
+        # Create the specie folder
+        # TODO: Generate the name depending on the specie
+        mySpecieFolderPath = os.path.join(myHubPath, self.genome_name)
+        if not os.path.exists(mySpecieFolderPath):
+            os.makedirs(mySpecieFolderPath)
+        self.mySpecieFolderPath = mySpecieFolderPath
+
+        # We create the 2bit file while we just created the specie folder
+        self.twoBitName = self.genome_name + ".2bit"
+        self.two_bit_final_path = os.path.join(self.mySpecieFolderPath, self.twoBitName)
+        shutil.copyfile(twoBitFile.name, self.two_bit_final_path)
+
         # Add the genomes.txt file
         genomesTxtFilePath = os.path.join(myHubPath, 'genomes.txt')
-        self.__fillGenomesTxt__(genomesTxtFilePath, toolDirectory)
+        self.__fillGenomesTxt__(genomesTxtFilePath)
 
         # Add the hub.txt file
         hubTxtFilePath = os.path.join(myHubPath, 'hub.txt')
-        self.__fillHubTxt__(hubTxtFilePath, toolDirectory)
+        self.__fillHubTxt__(hubTxtFilePath)
 
         # Add the hub.html file
         # TODO: Change the name and get it depending on the specie
         hubHtmlFilePath = os.path.join(myHubPath, 'dbia.html')
-        self.__fillHubHtmlFile__(hubHtmlFilePath, toolDirectory)
+        self.__fillHubHtmlFile__(hubHtmlFilePath)
 
-        # Create the specie folder
-        # TODO: Generate the name depending on the specie
-        mySpecieFolderPath = os.path.join(myHubPath, "dbia3")
-        if not os.path.exists(mySpecieFolderPath):
-            os.makedirs(mySpecieFolderPath)
-        self.mySpecieFolderPath = mySpecieFolderPath
 
         # Create the description html file in the specie folder
         descriptionHtmlFilePath = os.path.join(mySpecieFolderPath, 'description.html')
-        self.__fillDescriptionHtmlFile__(descriptionHtmlFilePath, toolDirectory)
+        self.__fillDescriptionHtmlFile__(descriptionHtmlFilePath)
 
         # Create the file groups.txt
         # TODO: If not inputs for this, do no create the file
         groupsTxtFilePath = os.path.join(mySpecieFolderPath, 'groups.txt')
-        self.__fillGroupsTxtFile__(groupsTxtFilePath, toolDirectory)
+        self.__fillGroupsTxtFile__(groupsTxtFilePath)
 
         # Create the folder tracks into the specie folder
         tracksFolderPath = os.path.join(mySpecieFolderPath, "tracks")
         if not os.path.exists(tracksFolderPath):
             os.makedirs(tracksFolderPath)
+        self.myTracksFolderPath = tracksFolderPath
 
         return myHubPath
 
-    def __fillGenomesTxt__(self, genomesTxtFilePath, toolDirectory):
+    def __fillGenomesTxt__(self, genomesTxtFilePath):
         # TODO: Think about the inputs and outputs
         # TODO: Manage the template of this file
         # renderer = pystache.Renderer(search_dirs="templates/genomesAssembly")
-        pathTemplate = os.path.join(toolDirectory, 'templates/genomesAssembly')
+        pathTemplate = os.path.join(self.tool_directory, 'templates/genomesAssembly')
         mylookup = TemplateLookup(directories=[pathTemplate], output_encoding='utf-8', encoding_errors='replace')
         mytemplate = mylookup.get_template("layout.txt")
         with open(genomesTxtFilePath, 'w') as genomesTxtFile:
             # Write the content of the file genomes.txt
-            twoBitPath = os.path.join('dbia3/', self.twoBitName)
+            twoBitPath = os.path.join(self.genome_name, self.twoBitName)
             htmlMakoRendered = mytemplate.render(
-                genomeName="dbia3",
-                trackDbPath="dbia3/trackDb.txt",
-                groupsPath="dbia3/groups.txt",
-                genomeDescription="March 2013 Drosophilia biarmipes unplaced genomic scaffold",
+                genomeName=self.genome_name,
+                trackDbPath=os.path.join(self.genome_name, "trackDb.txt"),
+                groupsPath=os.path.join(self.genome_name, "groups.txt"),
+                genomeDescription=self.genome_name,
                 twoBitPath=twoBitPath,
-                organismName="Drosophilia biarmipes",
-                defaultPosition="contig1",
+                organismName=self.genome_name,
+                defaultPosition=self.default_pos,
                 orderKey="4500",
-                scientificName="Drosophilia biarmipes",
-                pathAssemblyHtmlDescription="dbia3/description.html"
+                scientificName=self.genome_name,
+                pathAssemblyHtmlDescription=os.path.join(self.genome_name, "description.html")
             )
             genomesTxtFile.write(htmlMakoRendered)
 
-    def __fillHubTxt__(self, hubTxtFilePath, toolDirectory):
+    def __fillHubTxt__(self, hubTxtFilePath):
         # TODO: Think about the inputs and outputs
         # TODO: Manage the template of this file
-        mylookup = TemplateLookup(directories=[os.path.join(toolDirectory, 'templates/hubTxt')],
+        mylookup = TemplateLookup(directories=[os.path.join(self.tool_directory, 'templates/hubTxt')],
                                   output_encoding='utf-8', encoding_errors='replace')
         mytemplate = mylookup.get_template('layout.txt')
         with open(hubTxtFilePath, 'w') as genomesTxtFile:
             # Write the content of the file genomes.txt
             htmlMakoRendered = mytemplate.render(
-                hubName='dbiaOnly',
-                shortLabel='dbia',
-                longLabel='This hub only contains dbia with the gene predictions',
+                hubName=(''.join(['gonramp', self.genome_name.title()])),
+                shortLabel=self.genome_name,
+                longLabel=self.genome_name,
                 genomesFile='genomes.txt',
-                email='rmarenco@gwu.edu',
+                email=self.user_email,
                 descriptionUrl='dbia.html'
             )
             genomesTxtFile.write(htmlMakoRendered)
 
-    def __fillHubHtmlFile__(self, hubHtmlFilePath, toolDirectory):
+    def __fillHubHtmlFile__(self, hubHtmlFilePath):
         # TODO: Think about the inputs and outputs
         # TODO: Manage the template of this file
         # renderer = pystache.Renderer(search_dirs="templates/hubDescription")
         # t = Template(templates.hubDescription.layout.html)
-        mylookup = TemplateLookup(directories=[os.path.join(toolDirectory, 'templates/hubDescription')],
+        mylookup = TemplateLookup(directories=[os.path.join(self.tool_directory, 'templates/hubDescription')],
                                   output_encoding='utf-8', encoding_errors='replace')
         mytemplate = mylookup.get_template("layout.txt")
         with open(hubHtmlFilePath, 'w') as hubHtmlFile:
-            # Write the content of the file genomes.txt
-            # htmlPystached = renderer.render_name(
-            #     "layout",
-            #     {'specie': 'Dbia',
-            #     'toolUsed': 'Augustus',
-            #     'ncbiSpecieUrl': 'http://www.ncbi.nlm.nih.gov/genome/3499',
-            #     'genomeID': '3499',
-            #     'SpecieFullName': 'Drosophila biarmipes'})
             htmlMakoRendered = mytemplate.render(
                 specie='Dbia',
                 toolUsed='Augustus',
@@ -185,13 +228,12 @@
                 genomeID='3499',
                 specieFullName='Drosophila biarmipes'
             )
-            # hubHtmlFile.write(htmlPystached)
-            hubHtmlFile.write(htmlMakoRendered)
+            #hubHtmlFile.write(htmlMakoRendered)
 
-    def __fillDescriptionHtmlFile__(self, descriptionHtmlFilePath, toolDirectory):
+    def __fillDescriptionHtmlFile__(self, descriptionHtmlFilePath):
         # TODO: Think about the inputs and outputs
         # TODO: Manage the template of this file
-        mylookup = TemplateLookup(directories=[os.path.join(toolDirectory, 'templates/specieDescription')],
+        mylookup = TemplateLookup(directories=[os.path.join(self.tool_directory, 'templates/specieDescription')],
                                   output_encoding='utf-8', encoding_errors='replace')
         mytemplate = mylookup.get_template("layout.txt")
         with open(descriptionHtmlFilePath, 'w') as descriptionHtmlFile:
@@ -199,11 +241,11 @@
             htmlMakoRendered = mytemplate.render(
                 specieDescription='This is the description of the dbia',
             )
-            descriptionHtmlFile.write(htmlMakoRendered)
+            #descriptionHtmlFile.write(htmlMakoRendered)
 
-    def __fillGroupsTxtFile__(self, groupsTxtFilePath, toolDirectory):
+    def __fillGroupsTxtFile__(self, groupsTxtFilePath):
         # TODO: Reenable this function at some point
-        mylookup = TemplateLookup(directories=[os.path.join(toolDirectory, 'templates/groupsTxt')],
+        mylookup = TemplateLookup(directories=[os.path.join(self.tool_directory, 'templates/groupsTxt')],
                                   output_encoding='utf-8', encoding_errors='replace')
         mytemplate = mylookup.get_template("layout.txt")
         with open(groupsTxtFilePath, 'w') as groupsTxtFile: