Mercurial > repos > rmarenco > hubarchivecreator
diff TrackHub.py @ 10:acc233161f50 draft
planemo upload for repository https://github.com/goeckslab/hub-archive-creator commit 1b1063f90004764bcf504f4340738eca5c4b1f9d
author | rmarenco |
---|---|
date | Thu, 21 Jul 2016 05:58:51 -0400 |
parents | 4f9847539a28 |
children | d05236b15f81 |
line wrap: on
line diff
--- a/TrackHub.py Wed Jul 20 12:29:08 2016 -0400 +++ b/TrackHub.py Thu Jul 21 05:58:51 2016 -0400 @@ -2,21 +2,35 @@ # -*- coding: utf8 -*- import os +import tempfile +import shutil import zipfile +# Internal dependencies +from Datatype import Datatype +from util import subtools + from mako.lookup import TemplateLookup class TrackHub(object): """docstring for TrackHub""" - def __init__(self, inputFastaFile, outputFile, extra_files_path, tool_directory): + def __init__(self, inputFastaFile, user_email, outputFile, extra_files_path, tool_directory): super(TrackHub, self).__init__() self.rootAssemblyHub = None + self.mySpecieFolderPath = None + self.myTracksFolderPath = None self.tool_directory = tool_directory + self.reference_genome = inputFastaFile + # TODO: Add the specie name + self.genome_name = inputFastaFile.assembly_id + self.default_pos = None + self.user_email = user_email + # TODO: Modify according to the files passed in parameter mylookup = TemplateLookup(directories=[os.path.join(tool_directory, 'templates/trackDb')], output_encoding='utf-8', encoding_errors='replace') @@ -25,17 +39,21 @@ self.extra_files_path = extra_files_path self.outputFile = outputFile - inputFastaFile = open(inputFastaFile, 'r') - #self.outputZip = zipfile.ZipFile(os.path.join(extra_files_path, 'myHub.zip'), 'w', allowZip64=True) - # Create the structure of the Assembly Hub # TODO: Merge the following processing into a function as it is also used in twoBitCreator - baseNameFasta = os.path.basename(inputFastaFile.name) - suffixTwoBit, extensionTwoBit = os.path.splitext(baseNameFasta) - self.twoBitName = suffixTwoBit + '.2bit' + self.twoBitName = None + self.two_bit_final_path = None + self.chromSizesFile = None + + self.default_pos = None - self.rootAssemblyHub = self.__createAssemblyHub__(toolDirectory=tool_directory, - extra_files_path=extra_files_path) + # Set all the missing variables of this class, and create physically the folders/files + self.rootAssemblyHub = self.__createAssemblyHub__(extra_files_path=extra_files_path) + + # Init the Datatype + Datatype.pre_init(self.reference_genome, self.two_bit_final_path, self.chromSizesFile, + self.extra_files_path, self.tool_directory, + self.mySpecieFolderPath, self.myTracksFolderPath) def createZip(self): for root, dirs, files in os.walk(self.rootAssemblyHub): @@ -60,124 +78,149 @@ def terminate(self): # Just a test to output a simple HTML + # TODO: Create a class to handle the file object + mylookup = TemplateLookup(directories=[os.path.join(self.tool_directory, 'templates')], + output_encoding='utf-8', encoding_errors='replace') + + mytemplate = mylookup.get_template('display.txt') with open(self.outputFile, 'w') as htmlOutput: - htmlOutput.write('<html>') - htmlOutput.write('<body>') - htmlOutput.write('<p>') - htmlOutput.write('The following has been generated by Hub Archive Creator:') - htmlOutput.write('</p>') - htmlOutput.write('<ul>') + # TODO: We are basically looping two times: One time with os.walk, Second time + # with the template. We could improve that if the number of files begins to be really important + list_relative_file_path = [ ] for root, dirs, files in os.walk(self.extra_files_path): for file in files: - relDir = os.path.relpath(root, self.extra_files_path) - htmlOutput.write(str.format('<li><a href="{0}">{1}</a></li>', os.path.join(relDir, file), - os.path.join(relDir, file))) - htmlOutput.write('<ul>') - htmlOutput.write('</body>') - htmlOutput.write('</html>') + relative_directory = os.path.relpath(root, self.extra_files_path) + relative_file_path = os.path.join(relative_directory, file) + list_relative_file_path.append(relative_file_path) + + htmlMakoRendered = mytemplate.render( + list_relative_file_path=list_relative_file_path + ) + htmlOutput.write(htmlMakoRendered) + + def __createAssemblyHub__(self, extra_files_path): + # Get all necessaries infos first + # 2bit file creation from input fasta - def __createAssemblyHub__(self, toolDirectory, extra_files_path): + # baseNameFasta = os.path.basename(fasta_file_name) + # suffixTwoBit, extensionTwoBit = os.path.splitext(baseNameFasta) + # nameTwoBit = suffixTwoBit + '.2bit' + twoBitFile = tempfile.NamedTemporaryFile(bufsize=0) + subtools.faToTwoBit(self.reference_genome.false_path, twoBitFile.name) + + # Generate the twoBitInfo + twoBitInfoFile = tempfile.NamedTemporaryFile(bufsize=0) + subtools.twoBitInfo(twoBitFile.name, twoBitInfoFile.name) + + # Then we get the output to generate the chromSizes + self.chromSizesFile = tempfile.NamedTemporaryFile(bufsize=0, suffix=".chrom.sizes") + subtools.sortChromSizes(twoBitInfoFile.name, self.chromSizesFile.name) + + # We can get the biggest scaffold here, with chromSizesFile + with open(self.chromSizesFile.name, 'r') as chrom_sizes: + # TODO: Check if exists + self.default_pos = chrom_sizes.readline().split()[0] + # TODO: Manage to put every fill Function in a file dedicated for reading reasons # Create the root directory myHubPath = os.path.join(extra_files_path, "myHub") if not os.path.exists(myHubPath): os.makedirs(myHubPath) + # Create the specie folder + # TODO: Generate the name depending on the specie + mySpecieFolderPath = os.path.join(myHubPath, self.genome_name) + if not os.path.exists(mySpecieFolderPath): + os.makedirs(mySpecieFolderPath) + self.mySpecieFolderPath = mySpecieFolderPath + + # We create the 2bit file while we just created the specie folder + self.twoBitName = self.genome_name + ".2bit" + self.two_bit_final_path = os.path.join(self.mySpecieFolderPath, self.twoBitName) + shutil.copyfile(twoBitFile.name, self.two_bit_final_path) + # Add the genomes.txt file genomesTxtFilePath = os.path.join(myHubPath, 'genomes.txt') - self.__fillGenomesTxt__(genomesTxtFilePath, toolDirectory) + self.__fillGenomesTxt__(genomesTxtFilePath) # Add the hub.txt file hubTxtFilePath = os.path.join(myHubPath, 'hub.txt') - self.__fillHubTxt__(hubTxtFilePath, toolDirectory) + self.__fillHubTxt__(hubTxtFilePath) # Add the hub.html file # TODO: Change the name and get it depending on the specie hubHtmlFilePath = os.path.join(myHubPath, 'dbia.html') - self.__fillHubHtmlFile__(hubHtmlFilePath, toolDirectory) + self.__fillHubHtmlFile__(hubHtmlFilePath) - # Create the specie folder - # TODO: Generate the name depending on the specie - mySpecieFolderPath = os.path.join(myHubPath, "dbia3") - if not os.path.exists(mySpecieFolderPath): - os.makedirs(mySpecieFolderPath) - self.mySpecieFolderPath = mySpecieFolderPath # Create the description html file in the specie folder descriptionHtmlFilePath = os.path.join(mySpecieFolderPath, 'description.html') - self.__fillDescriptionHtmlFile__(descriptionHtmlFilePath, toolDirectory) + self.__fillDescriptionHtmlFile__(descriptionHtmlFilePath) # Create the file groups.txt # TODO: If not inputs for this, do no create the file groupsTxtFilePath = os.path.join(mySpecieFolderPath, 'groups.txt') - self.__fillGroupsTxtFile__(groupsTxtFilePath, toolDirectory) + self.__fillGroupsTxtFile__(groupsTxtFilePath) # Create the folder tracks into the specie folder tracksFolderPath = os.path.join(mySpecieFolderPath, "tracks") if not os.path.exists(tracksFolderPath): os.makedirs(tracksFolderPath) + self.myTracksFolderPath = tracksFolderPath return myHubPath - def __fillGenomesTxt__(self, genomesTxtFilePath, toolDirectory): + def __fillGenomesTxt__(self, genomesTxtFilePath): # TODO: Think about the inputs and outputs # TODO: Manage the template of this file # renderer = pystache.Renderer(search_dirs="templates/genomesAssembly") - pathTemplate = os.path.join(toolDirectory, 'templates/genomesAssembly') + pathTemplate = os.path.join(self.tool_directory, 'templates/genomesAssembly') mylookup = TemplateLookup(directories=[pathTemplate], output_encoding='utf-8', encoding_errors='replace') mytemplate = mylookup.get_template("layout.txt") with open(genomesTxtFilePath, 'w') as genomesTxtFile: # Write the content of the file genomes.txt - twoBitPath = os.path.join('dbia3/', self.twoBitName) + twoBitPath = os.path.join(self.genome_name, self.twoBitName) htmlMakoRendered = mytemplate.render( - genomeName="dbia3", - trackDbPath="dbia3/trackDb.txt", - groupsPath="dbia3/groups.txt", - genomeDescription="March 2013 Drosophilia biarmipes unplaced genomic scaffold", + genomeName=self.genome_name, + trackDbPath=os.path.join(self.genome_name, "trackDb.txt"), + groupsPath=os.path.join(self.genome_name, "groups.txt"), + genomeDescription=self.genome_name, twoBitPath=twoBitPath, - organismName="Drosophilia biarmipes", - defaultPosition="contig1", + organismName=self.genome_name, + defaultPosition=self.default_pos, orderKey="4500", - scientificName="Drosophilia biarmipes", - pathAssemblyHtmlDescription="dbia3/description.html" + scientificName=self.genome_name, + pathAssemblyHtmlDescription=os.path.join(self.genome_name, "description.html") ) genomesTxtFile.write(htmlMakoRendered) - def __fillHubTxt__(self, hubTxtFilePath, toolDirectory): + def __fillHubTxt__(self, hubTxtFilePath): # TODO: Think about the inputs and outputs # TODO: Manage the template of this file - mylookup = TemplateLookup(directories=[os.path.join(toolDirectory, 'templates/hubTxt')], + mylookup = TemplateLookup(directories=[os.path.join(self.tool_directory, 'templates/hubTxt')], output_encoding='utf-8', encoding_errors='replace') mytemplate = mylookup.get_template('layout.txt') with open(hubTxtFilePath, 'w') as genomesTxtFile: # Write the content of the file genomes.txt htmlMakoRendered = mytemplate.render( - hubName='dbiaOnly', - shortLabel='dbia', - longLabel='This hub only contains dbia with the gene predictions', + hubName=(''.join(['gonramp', self.genome_name.title()])), + shortLabel=self.genome_name, + longLabel=self.genome_name, genomesFile='genomes.txt', - email='rmarenco@gwu.edu', + email=self.user_email, descriptionUrl='dbia.html' ) genomesTxtFile.write(htmlMakoRendered) - def __fillHubHtmlFile__(self, hubHtmlFilePath, toolDirectory): + def __fillHubHtmlFile__(self, hubHtmlFilePath): # TODO: Think about the inputs and outputs # TODO: Manage the template of this file # renderer = pystache.Renderer(search_dirs="templates/hubDescription") # t = Template(templates.hubDescription.layout.html) - mylookup = TemplateLookup(directories=[os.path.join(toolDirectory, 'templates/hubDescription')], + mylookup = TemplateLookup(directories=[os.path.join(self.tool_directory, 'templates/hubDescription')], output_encoding='utf-8', encoding_errors='replace') mytemplate = mylookup.get_template("layout.txt") with open(hubHtmlFilePath, 'w') as hubHtmlFile: - # Write the content of the file genomes.txt - # htmlPystached = renderer.render_name( - # "layout", - # {'specie': 'Dbia', - # 'toolUsed': 'Augustus', - # 'ncbiSpecieUrl': 'http://www.ncbi.nlm.nih.gov/genome/3499', - # 'genomeID': '3499', - # 'SpecieFullName': 'Drosophila biarmipes'}) htmlMakoRendered = mytemplate.render( specie='Dbia', toolUsed='Augustus', @@ -185,13 +228,12 @@ genomeID='3499', specieFullName='Drosophila biarmipes' ) - # hubHtmlFile.write(htmlPystached) - hubHtmlFile.write(htmlMakoRendered) + #hubHtmlFile.write(htmlMakoRendered) - def __fillDescriptionHtmlFile__(self, descriptionHtmlFilePath, toolDirectory): + def __fillDescriptionHtmlFile__(self, descriptionHtmlFilePath): # TODO: Think about the inputs and outputs # TODO: Manage the template of this file - mylookup = TemplateLookup(directories=[os.path.join(toolDirectory, 'templates/specieDescription')], + mylookup = TemplateLookup(directories=[os.path.join(self.tool_directory, 'templates/specieDescription')], output_encoding='utf-8', encoding_errors='replace') mytemplate = mylookup.get_template("layout.txt") with open(descriptionHtmlFilePath, 'w') as descriptionHtmlFile: @@ -199,11 +241,11 @@ htmlMakoRendered = mytemplate.render( specieDescription='This is the description of the dbia', ) - descriptionHtmlFile.write(htmlMakoRendered) + #descriptionHtmlFile.write(htmlMakoRendered) - def __fillGroupsTxtFile__(self, groupsTxtFilePath, toolDirectory): + def __fillGroupsTxtFile__(self, groupsTxtFilePath): # TODO: Reenable this function at some point - mylookup = TemplateLookup(directories=[os.path.join(toolDirectory, 'templates/groupsTxt')], + mylookup = TemplateLookup(directories=[os.path.join(self.tool_directory, 'templates/groupsTxt')], output_encoding='utf-8', encoding_errors='replace') mytemplate = mylookup.get_template("layout.txt") with open(groupsTxtFilePath, 'w') as groupsTxtFile: