Mercurial > repos > rmarenco > hub_archive_creator
view hub-archive-creator-1.6/hubArchiveCreator.py @ 0:163b2de763ea draft
Upload the full hubArchiveCreator archive
author | rmarenco |
---|---|
date | Tue, 01 Mar 2016 19:43:25 -0500 |
parents | |
children |
line wrap: on
line source
#!/usr/bin/python """ This Galaxy tool permits to prepare your files to be ready for Assembly Hub visualization. Program test arguments: hubArchiveCreator.py -g test_data/augustusDbia3.gff3 -f test_data/dbia3.fa -d . -o output.zip """ import sys import tempfile import getopt import zipfile import subprocess import os import argparse from mako.template import Template from mako.lookup import TemplateLookup # Internal dependencies from twoBitCreator import twoBitFileCreator # TODO: REMOVE THIS FROM BEING A GLOBAL VARIABLE toolDirectory = '.' extra_files_path = '.' def main(argv): # Command Line parsing init parser = argparse.ArgumentParser(description='Create a foo.txt inside the given folder.') parser.add_argument('-g', '--gff3', help='Directory where to put the foo.txt') parser.add_argument('-f', '--fasta', help='Directory where to put the foo.txt') parser.add_argument('-d', '--directory', help='Directory where to put the foo.txt') parser.add_argument('-e', '--extra_files_path', help='Directory where to put the foo.txt') parser.add_argument('-o', '--output', help='Directory where to put the foo.txt') global toolDirectory global extra_files_path inputGFF3File = '' inputFastaFile = '' # Get the args passed in parameter args = parser.parse_args() inputGFF3File = open(args.gff3, 'r') inputFastaFile = open(args.fasta, 'r') if args.directory: toolDirectory = args.directory if args.extra_files_path: extra_files_path = args.extra_files_path outputZip = zipfile.ZipFile(os.path.join(extra_files_path, 'myHub.zip'), 'w') # Create the structure of the Assembly Hub # TODO: Merge the following processing into a function as it is also used in twoBitCreator baseNameFasta = os.path.basename(inputFastaFile.name) suffixTwoBit, extensionTwoBit = os.path.splitext(baseNameFasta) nameTwoBit = suffixTwoBit + '.2bit' rootAssemblyHub = createAssemblyHub(outputZip, twoBitName=nameTwoBit) # TODO: See if we need these temporary files as part of the generated files genePredFile = tempfile.NamedTemporaryFile(bufsize=0, suffix=".genePred") unsortedBedFile = tempfile.NamedTemporaryFile(bufsize=0, suffix=".unsortedBed") sortedBedFile = tempfile.NamedTemporaryFile(suffix=".sortedBed") twoBitInfoFile = tempfile.NamedTemporaryFile(bufsize=0) chromSizesFile = tempfile.NamedTemporaryFile(bufsize=0, suffix=".chrom.sizes") # gff3ToGenePred processing p = subprocess.Popen( [os.path.join(toolDirectory, 'tools/gff3ToGenePred'), inputGFF3File.name, genePredFile.name]) # We need to wait the time gff3ToGenePred terminate so genePredToBed can begin # TODO: Check if we should use communicate instead of wait p.wait() # genePredToBed processing p = subprocess.Popen( [os.path.join(toolDirectory, 'tools/genePredToBed'), genePredFile.name, unsortedBedFile.name]) p.wait() # Sort processing p = subprocess.Popen( ['sort', '-k' '1,1', '-k' '2,2n', unsortedBedFile.name, '-o', sortedBedFile.name]) p.wait() mySpecieFolderPath = os.path.join(extra_files_path, "myHub", "dbia3") # 2bit file creation from input fasta twoBitFile = twoBitFileCreator(inputFastaFile, toolDirectory, mySpecieFolderPath) # Generate the chrom.sizes # TODO: Isolate in a function # We first get the twoBit Infos p = subprocess.Popen( [os.path.join(toolDirectory, 'tools/twoBitInfo'), twoBitFile.name, 'stdout'], stdout=subprocess.PIPE, stderr=subprocess.PIPE) twoBitInfo_out, twoBitInfo_err = p.communicate() twoBitInfoFile.write(twoBitInfo_out) # Then we get the output to inject into the sort # TODO: Check if no errors p = subprocess.Popen( ['sort', '-k2rn', twoBitInfoFile.name, '-o', chromSizesFile.name]) p.wait() # bedToBigBed processing # bedToBigBed augustusDbia3.sortbed chrom.sizes augustusDbia3.bb # TODO: Find the best to get this path without hardcoding it myTrackFolderPath = os.path.join(mySpecieFolderPath, "tracks") # TODO: Change the name of the bb, to tool + genome + .bb myBigBedFilePath = os.path.join(myTrackFolderPath, 'augustusDbia3.bb') with open(myBigBedFilePath, 'w') as bigBedFile: p = subprocess.Popen( [os.path.join(toolDirectory, 'tools/bedToBigBed'), sortedBedFile.name, chromSizesFile.name, bigBedFile.name]) p.wait() # TODO: Add the .bb file in the zip, at the right place createZip(outputZip, rootAssemblyHub) # outputZip.write(sortedBedFile.name) # TODO: Find the best to get this path without hardcoding it # outputZip.write(bigBedFile.name) outputZip.close() # Just a test to output a simple HTML with open(args.output, 'w') as htmlOutput: htmlOutput.write('<html>') htmlOutput.write('<body>') htmlOutput.write('<p>') htmlOutput.write('The following generated by Hub Archive Creator:') htmlOutput.write('</p>') htmlOutput.write('<ul>') for root, dirs, files in os.walk(extra_files_path): # Get all files and get all relative links at the same time for file in files: relDir = os.path.relpath(root, extra_files_path) htmlOutput.write(str.format('<li><a href="{0}">{1}</a></li>', os.path.join(relDir, file), os.path.join(relDir, file))) htmlOutput.write('<ul>') htmlOutput.write('</body>') htmlOutput.write('</html>') sys.exit(0) def createAssemblyHub(outputZip, twoBitName): # TODO: Manage to put every fill Function in a file dedicated for reading reasons # Create the root directory myHubPath = os.path.join(extra_files_path, "myHub") if not os.path.exists(myHubPath): os.makedirs(myHubPath) # Add the genomes.txt file genomesTxtFilePath = os.path.join(myHubPath, 'genomes.txt') fillGenomesTxt(genomesTxtFilePath, twoBitName) # Add the hub.txt file hubTxtFilePath = os.path.join(myHubPath, 'hub.txt') fillHubTxt(hubTxtFilePath) # Add the hub.html file # TODO: Change the name and get it depending on the specie hubHtmlFilePath = os.path.join(myHubPath, 'dbia.html') fillHubHtmlFile(hubHtmlFilePath) # Create the specie folder # TODO: Generate the name depending on the specie mySpecieFolderPath = os.path.join(myHubPath, "dbia3") if not os.path.exists(mySpecieFolderPath): os.makedirs(mySpecieFolderPath) # Create the trackDb.txt file in the specie folder trackDbTxtFilePath = os.path.join(mySpecieFolderPath, 'trackDb.txt') fillTrackDbTxtFile(trackDbTxtFilePath) # Create the description html file in the specie folder descriptionHtmlFilePath = os.path.join(mySpecieFolderPath, 'description.html') fillDescriptionHtmlFile(descriptionHtmlFilePath) # Create the file groups.txt # TODO: If not inputs for this, do no create the file groupsTxtFilePath = os.path.join(mySpecieFolderPath, 'groups.txt') fillGroupsTxtFile(groupsTxtFilePath) # Create the folder tracks into the specie folder tracksFolderPath = os.path.join(mySpecieFolderPath, "tracks") if not os.path.exists(tracksFolderPath): os.makedirs(tracksFolderPath) return myHubPath def fillGenomesTxt(genomesTxtFilePath, twoBitName): # TODO: Think about the inputs and outputs # TODO: Manage the template of this file # renderer = pystache.Renderer(search_dirs="templates/genomesAssembly") pathTemplate = os.path.join(toolDirectory, 'templates/genomesAssembly') mylookup = TemplateLookup(directories=[pathTemplate], output_encoding='utf-8', encoding_errors='replace') mytemplate = mylookup.get_template("layout.txt") with open(genomesTxtFilePath, 'w') as genomesTxtFile: # Write the content of the file genomes.txt twoBitPath = os.path.join('dbia3/', twoBitName) htmlMakoRendered = mytemplate.render( genomeName="dbia3", trackDbPath="dbia3/trackDb.txt", groupsPath="dbia3/groups.txt", genomeDescription="March 2013 Drosophilia biarmipes unplaced genomic scaffold", twoBitPath=twoBitPath, organismName="Drosophilia biarmipes", defaultPosition="contig1", orderKey="4500", scientificName="Drosophilia biarmipes", pathAssemblyHtmlDescription="dbia3/description.html" ) genomesTxtFile.write(htmlMakoRendered) def fillHubTxt(hubTxtFilePath): # TODO: Think about the inputs and outputs # TODO: Manage the template of this file mylookup = TemplateLookup(directories=[os.path.join(toolDirectory, 'templates/hubTxt')], output_encoding='utf-8', encoding_errors='replace') mytemplate = mylookup.get_template('layout.txt') with open(hubTxtFilePath, 'w') as genomesTxtFile: # Write the content of the file genomes.txt htmlMakoRendered = mytemplate.render( hubName='dbiaOnly', shortLabel='dbia', longLabel='This hub only contains dbia with the gene predictions', genomesFile='genomes.txt', email='rmarenco@gwu.edu', descriptionUrl='dbia.html' ) genomesTxtFile.write(htmlMakoRendered) def fillHubHtmlFile(hubHtmlFilePath): # TODO: Think about the inputs and outputs # TODO: Manage the template of this file # renderer = pystache.Renderer(search_dirs="templates/hubDescription") # t = Template(templates.hubDescription.layout.html) mylookup = TemplateLookup(directories=[os.path.join(toolDirectory, 'templates/hubDescription')], output_encoding='utf-8', encoding_errors='replace') mytemplate = mylookup.get_template("layout.txt") with open(hubHtmlFilePath, 'w') as hubHtmlFile: # Write the content of the file genomes.txt # htmlPystached = renderer.render_name( # "layout", # {'specie': 'Dbia', # 'toolUsed': 'Augustus', # 'ncbiSpecieUrl': 'http://www.ncbi.nlm.nih.gov/genome/3499', # 'genomeID': '3499', # 'SpecieFullName': 'Drosophila biarmipes'}) htmlMakoRendered = mytemplate.render( specie='Dbia', toolUsed='Augustus', ncbiSpecieUrl='http://www.ncbi.nlm.nih.gov/genome/3499', genomeID='3499', specieFullName='Drosophila biarmipes' ) # hubHtmlFile.write(htmlPystached) hubHtmlFile.write(htmlMakoRendered) def fillTrackDbTxtFile(trackDbTxtFilePath): # TODO: Modify according to the files passed in parameter mylookup = TemplateLookup(directories=[os.path.join(toolDirectory, 'templates/trackDb')], output_encoding='utf-8', encoding_errors='replace') mytemplate = mylookup.get_template("layout.txt") with open(trackDbTxtFilePath, 'w') as trackDbFile: htmlMakoRendered = mytemplate.render( trackName='augustusTrack', trackDataURL='Augustus_dbia3', shortLabel='a_dbia', longLabel='tracks/augustusDbia3.bb', trackType='bigBed 12 +', visibility='dense' ) trackDbFile.write(htmlMakoRendered) def fillDescriptionHtmlFile(descriptionHtmlFilePath): # TODO: Think about the inputs and outputs # TODO: Manage the template of this file mylookup = TemplateLookup(directories=[os.path.join(toolDirectory, 'templates/specieDescription')], output_encoding='utf-8', encoding_errors='replace') mytemplate = mylookup.get_template("layout.txt") with open(descriptionHtmlFilePath, 'w') as descriptionHtmlFile: # Write the content of the file genomes.txt htmlMakoRendered = mytemplate.render( specieDescription='This is the description of the dbia', ) descriptionHtmlFile.write(htmlMakoRendered) def fillGroupsTxtFile(groupsTxtFilePath): mylookup = TemplateLookup(directories=[os.path.join(toolDirectory, 'templates/groupsTxt')], output_encoding='utf-8', encoding_errors='replace') mytemplate = mylookup.get_template("layout.txt") with open(groupsTxtFilePath, 'w') as groupsTxtFile: # Write the content of groups.txt # groupsTxtFile.write('name map') htmlMakoRendered = mytemplate.render( mapName='map', labelMapping='Mapping', prioriy='2', isClosed='0' ) # groupsTxtFile.write(htmlMakoRendered) def createZip(myZip, folder): for root, dirs, files in os.walk(folder): # Get all files and construct the dir at the same time for file in files: myZip.write(os.path.join(root, file)) if __name__ == "__main__": main(sys.argv)