diff TrackHub.py @ 0:f493979f1408 draft default tip

planemo upload for repository https://github.com/Yating-L/hubarchivecreator-test commit 48b59e91e2dcc2e97735ee35d587960cbfbce932-dirty
author yating-l
date Wed, 21 Dec 2016 12:13:04 -0500
parents
children
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/TrackHub.py	Wed Dec 21 12:13:04 2016 -0500
@@ -0,0 +1,329 @@
+#!/usr/bin/python
+# -*- coding: utf8 -*-
+
+import logging
+import os
+import tempfile
+import shutil
+import zipfile
+
+# Internal dependencies
+from Datatype import Datatype
+from util import subtools
+
+from mako.lookup import TemplateLookup
+
+
+class TrackHub(object):
+    """docstring for TrackHub"""
+
+    def __init__(self, inputFastaFile, user_email, outputFile, extra_files_path, tool_directory):
+        super(TrackHub, self).__init__()
+
+        self.rootAssemblyHub = None
+
+        self.mySpecieFolderPath = None
+        self.myTracksFolderPath = None
+        self.tool_directory = tool_directory
+
+        self.reference_genome = inputFastaFile
+        # TODO: Add the specie name
+        self.genome_name = inputFastaFile.assembly_id
+        self.specie_html = self.genome_name + '.html'
+        self.default_pos = None
+        self.user_email = user_email
+
+        # Set containing the groups already added. Updated by addGroup()
+        self.groups = set()
+
+        # TODO: Modify according to the files passed in parameter
+        # ---- Templates ----
+        # Template trackDb
+        mylookup = TemplateLookup(directories=[os.path.join(tool_directory, 'templates/trackDb')],
+                                  output_encoding='utf-8', encoding_errors='replace')
+        self.trackDbTemplate = mylookup.get_template("layout.txt")
+
+        # Template groups
+        mylookup = TemplateLookup(directories=[os.path.join(self.tool_directory, 'templates/groupsTxt')],
+                                  output_encoding='utf-8', encoding_errors='replace')
+        self.groupsTemplate = mylookup.get_template("layout.txt")
+
+        # ---- End Templates ----
+
+        self.extra_files_path = extra_files_path
+        self.outputFile = outputFile
+
+        # Create the structure of the Assembly Hub
+        # TODO: Merge the following processing into a function as it is also used in twoBitCreator
+        self.twoBitName = None
+        self.two_bit_final_path = None
+        self.chromSizesFile = None
+
+        self.default_pos = None
+
+        # Set all the missing variables of this class, and create physically the folders/files
+        self.rootAssemblyHub = self.__createAssemblyHub__(extra_files_path=extra_files_path)
+
+        # Init the Datatype
+        Datatype.pre_init(self.reference_genome, self.two_bit_final_path, self.chromSizesFile,
+                          self.extra_files_path, self.tool_directory,
+                          self.mySpecieFolderPath, self.myTracksFolderPath)
+
+    def createZip(self):
+        for root, dirs, files in os.walk(self.rootAssemblyHub):
+            # Get all files and construct the dir at the same time
+            for file in files:
+                self.outputZip.write(os.path.join(root, file))
+
+        self.outputZip.close()
+
+    def addTrack(self, trackDbObject=None):
+        # Create the trackDb.txt file in the specie folder, if not exists
+        # Else append the new track
+        # TODO: Get this out of the function
+        trackDbTxtFilePath = os.path.join(self.mySpecieFolderPath, 'trackDb.txt')
+
+        # Append to trackDbTxtFilePath the trackDbTemplate populate with the newTrack object
+        with open(trackDbTxtFilePath, 'a+') as trackDbFile:
+            trackDbs = [trackDbObject]
+
+            # TODO: The addGroup does not belong here. Move it when the group becomes more than just a label
+            # Add the group as well, if exists in trackDbObject
+            self.addGroup(trackDbObject.group_name)
+
+            htmlMakoRendered = self.trackDbTemplate.render(
+                trackDbs=trackDbs
+            )
+            trackDbFile.write(htmlMakoRendered)
+
+        logging.debug("We just added track {0} (in group {1})".format(trackDbObject.trackName,
+                                                                  trackDbObject.group_name.lower().replace(' ', '_')))
+
+    def addGroup(self, group_name="Default"):
+        # If not already present in self.groups, add to groups.txt
+        # Create the trackDb.txt file in the specie folder, if not exists
+        # Else append the new track
+        # TODO: Get this out of the function
+        groupsTxtFilePath = os.path.join(self.mySpecieFolderPath, 'groups.txt')
+
+        # If the group is already present, we don't need to add it
+        if group_name in self.groups:
+            logging.debug("We DON'T add in {0} the group {1}".format(groupsTxtFilePath,
+                                                                      group_name))
+            return
+
+        # Append to trackDbTxtFilePath the trackDbTemplate populate with the newTrack object
+        with open(groupsTxtFilePath, 'a+') as groupFile:
+            # Add the group as well, if exists in trackDbObject
+
+            htmlMakoRendered = self.groupsTemplate.render(
+                    label=group_name
+            )
+            groupFile.write(htmlMakoRendered)
+        logging.debug("We just added in {0} the group {1}".format(groupsTxtFilePath,
+                                                                  group_name))
+        self.groups.add(group_name)
+
+    def terminate(self):
+        # Just a test to output a simple HTML
+        # TODO: Create a class to handle the file object
+        mylookup = TemplateLookup(directories=[os.path.join(self.tool_directory, 'templates')],
+                                  output_encoding='utf-8', encoding_errors='replace')
+
+        mytemplate = mylookup.get_template('display.txt')
+        with open(self.outputFile, 'w') as htmlOutput:
+            # TODO: We are basically looping two times: One time with os.walk, Second time
+            # with the template. We could improve that if the number of files begins to be really important
+            list_relative_file_path = [ ]
+
+            # TODO: Create classes Tree to manage this => Better readibility and maintenability
+            def create_tree(array_path, tree, relative_array_file_path, level=0):
+                cur_relative_file_path = '/'.join(relative_array_file_path[:level+1])
+                if array_path[0] in tree.keys():
+                    create_tree(array_path[1:], tree[array_path[0]][0],
+                                relative_array_file_path, level+1)
+                else:
+                    tree[array_path[0]] = ({}, cur_relative_file_path)
+                    # TODO: Manage also the links of the directories => No link?
+                    # => Managed in display.txt, but could also be managed there
+                    # If we are don't have any sub-vertices
+                    if len(array_path) == 1:
+                        # We create the path to it
+                        return
+                    else:
+                        create_tree(array_path[1:], tree[array_path[0]][0],
+                                    relative_array_file_path, level + 1)
+
+            walkable_tree = {}
+            for root, dirs, files in os.walk(self.extra_files_path):
+                # Prepare the tree from to perform a Depth First Search
+                for file in files:
+                    relative_directory = os.path.relpath(root, self.extra_files_path)
+                    relative_file_path = os.path.join(relative_directory, file)
+                    array_path = relative_file_path.split('/')
+                    create_tree(array_path, walkable_tree, array_path, 0)
+
+            htmlMakoRendered = mytemplate.render(
+                walkable_tree=walkable_tree
+            )
+            htmlOutput.write(htmlMakoRendered)
+
+    def __createAssemblyHub__(self, extra_files_path):
+        # Get all necessaries infos first
+        # 2bit file creation from input fasta
+
+        # baseNameFasta = os.path.basename(fasta_file_name)
+        # suffixTwoBit, extensionTwoBit = os.path.splitext(baseNameFasta)
+        # nameTwoBit = suffixTwoBit + '.2bit'
+        twoBitFile = tempfile.NamedTemporaryFile(bufsize=0)
+        subtools.faToTwoBit(self.reference_genome.false_path, twoBitFile.name)
+
+        # Generate the twoBitInfo
+        twoBitInfoFile = tempfile.NamedTemporaryFile(bufsize=0)
+        subtools.twoBitInfo(twoBitFile.name, twoBitInfoFile.name)
+
+        # Then we get the output to generate the chromSizes
+        self.chromSizesFile = tempfile.NamedTemporaryFile(bufsize=0, suffix=".chrom.sizes")
+        subtools.sortChromSizes(twoBitInfoFile.name, self.chromSizesFile.name)
+
+        # We can get the biggest scaffold here, with chromSizesFile
+        with open(self.chromSizesFile.name, 'r') as chrom_sizes:
+            # TODO: Check if exists
+            self.default_pos = chrom_sizes.readline().split()[0]
+
+        # TODO: Manage to put every fill Function in a file dedicated for reading reasons
+        # Create the root directory
+        myHubPath = os.path.join(extra_files_path, "myHub")
+        if not os.path.exists(myHubPath):
+            os.makedirs(myHubPath)
+
+        # Create the specie folder
+        # TODO: Generate the name depending on the specie
+        mySpecieFolderPath = os.path.join(myHubPath, self.genome_name)
+        if not os.path.exists(mySpecieFolderPath):
+            os.makedirs(mySpecieFolderPath)
+        self.mySpecieFolderPath = mySpecieFolderPath
+
+        # We create the 2bit file while we just created the specie folder
+        self.twoBitName = self.genome_name + ".2bit"
+        self.two_bit_final_path = os.path.join(self.mySpecieFolderPath, self.twoBitName)
+        shutil.copyfile(twoBitFile.name, self.two_bit_final_path)
+
+        # Add the genomes.txt file
+        genomesTxtFilePath = os.path.join(myHubPath, 'genomes.txt')
+        self.__fillGenomesTxt__(genomesTxtFilePath)
+
+        # Add the hub.txt file
+        hubTxtFilePath = os.path.join(myHubPath, 'hub.txt')
+        self.__fillHubTxt__(hubTxtFilePath)
+
+        # Add the hub.html file
+        hubHtmlFilePath = os.path.join(myHubPath, self.specie_html)
+        self.__fillHubHtmlFile__(hubHtmlFilePath)
+
+
+        # Create the description html file in the specie folder
+        descriptionHtmlFilePath = os.path.join(mySpecieFolderPath, 'description.html')
+        self.__fillDescriptionHtmlFile__(descriptionHtmlFilePath)
+
+        # Create the file groups.txt
+        # TODO: If not inputs for this, do no create the file
+        # groupsTxtFilePath = os.path.join(mySpecieFolderPath, 'groups.txt')
+        # self.__fillGroupsTxtFile__(groupsTxtFilePath)
+
+        # Create the folder tracks into the specie folder
+        tracksFolderPath = os.path.join(mySpecieFolderPath, "tracks")
+        if not os.path.exists(tracksFolderPath):
+            os.makedirs(tracksFolderPath)
+        self.myTracksFolderPath = tracksFolderPath
+
+        return myHubPath
+
+    def __fillGenomesTxt__(self, genomesTxtFilePath):
+        # TODO: Think about the inputs and outputs
+        # TODO: Manage the template of this file
+        # renderer = pystache.Renderer(search_dirs="templates/genomesAssembly")
+        pathTemplate = os.path.join(self.tool_directory, 'templates/genomesAssembly')
+        mylookup = TemplateLookup(directories=[pathTemplate], output_encoding='utf-8', encoding_errors='replace')
+        mytemplate = mylookup.get_template("layout.txt")
+        with open(genomesTxtFilePath, 'w') as genomesTxtFile:
+            # Write the content of the file genomes.txt
+            twoBitPath = os.path.join(self.genome_name, self.twoBitName)
+            htmlMakoRendered = mytemplate.render(
+                genomeName=self.genome_name,
+                trackDbPath=os.path.join(self.genome_name, "trackDb.txt"),
+                groupsPath=os.path.join(self.genome_name, "groups.txt"),
+                genomeDescription=self.genome_name,
+                twoBitPath=twoBitPath,
+                organismName=self.genome_name,
+                defaultPosition=self.default_pos,
+                orderKey="4500",
+                scientificName=self.genome_name,
+                pathAssemblyHtmlDescription=os.path.join(self.genome_name, "description.html")
+            )
+            genomesTxtFile.write(htmlMakoRendered)
+
+    def __fillHubTxt__(self, hubTxtFilePath):
+        # TODO: Think about the inputs and outputs
+        # TODO: Manage the template of this file
+        mylookup = TemplateLookup(directories=[os.path.join(self.tool_directory, 'templates/hubTxt')],
+                                  output_encoding='utf-8', encoding_errors='replace')
+        mytemplate = mylookup.get_template('layout.txt')
+        with open(hubTxtFilePath, 'w') as genomesTxtFile:
+            # Write the content of the file genomes.txt
+            htmlMakoRendered = mytemplate.render(
+                hubName=(''.join(['gonramp', self.genome_name.title()])),
+                shortLabel=self.genome_name,
+                longLabel=self.genome_name,
+                genomesFile='genomes.txt',
+                email=self.user_email,
+                descriptionUrl=self.specie_html
+            )
+            genomesTxtFile.write(htmlMakoRendered)
+
+    def __fillHubHtmlFile__(self, hubHtmlFilePath):
+        # TODO: Think about the inputs and outputs
+        # TODO: Manage the template of this file
+        # renderer = pystache.Renderer(search_dirs="templates/hubDescription")
+        # t = Template(templates.hubDescription.layout.html)
+        mylookup = TemplateLookup(directories=[os.path.join(self.tool_directory, 'templates/hubDescription')],
+                                  output_encoding='utf-8', encoding_errors='replace')
+        mytemplate = mylookup.get_template("layout.txt")
+        with open(hubHtmlFilePath, 'w') as hubHtmlFile:
+            htmlMakoRendered = mytemplate.render(
+                specie='Dbia',
+                toolUsed='Augustus',
+                ncbiSpecieUrl='http://www.ncbi.nlm.nih.gov/genome/3499',
+                genomeID='3499',
+                specieFullName='Drosophila biarmipes'
+            )
+            #hubHtmlFile.write(htmlMakoRendered)
+
+    def __fillDescriptionHtmlFile__(self, descriptionHtmlFilePath):
+        # TODO: Think about the inputs and outputs
+        # TODO: Manage the template of this file
+        mylookup = TemplateLookup(directories=[os.path.join(self.tool_directory, 'templates/specieDescription')],
+                                  output_encoding='utf-8', encoding_errors='replace')
+        mytemplate = mylookup.get_template("layout.txt")
+        with open(descriptionHtmlFilePath, 'w') as descriptionHtmlFile:
+            # Write the content of the file genomes.txt
+            htmlMakoRendered = mytemplate.render(
+                specieDescription='This is the description of the dbia',
+            )
+            #descriptionHtmlFile.write(htmlMakoRendered)
+
+    def __fillGroupsTxtFile__(self, groupsTxtFilePath):
+        # TODO: Reenable this function at some point
+        mylookup = TemplateLookup(directories=[os.path.join(self.tool_directory, 'templates/groupsTxt')],
+                                  output_encoding='utf-8', encoding_errors='replace')
+        mytemplate = mylookup.get_template("layout.txt")
+        with open(groupsTxtFilePath, 'w') as groupsTxtFile:
+            # Write the content of groups.txt
+            # groupsTxtFile.write('name map')
+            htmlMakoRendered = mytemplate.render(
+                mapName='map',
+                labelMapping='Mapping',
+                prioriy='2',
+                isClosed='0'
+            )
+            # groupsTxtFile.write(htmlMakoRendered)