Mercurial > repos > rmarenco > hubarchivecreator
changeset 10:acc233161f50 draft
planemo upload for repository https://github.com/goeckslab/hub-archive-creator commit 1b1063f90004764bcf504f4340738eca5c4b1f9d
author | rmarenco |
---|---|
date | Thu, 21 Jul 2016 05:58:51 -0400 |
parents | 4f9847539a28 |
children | d05236b15f81 |
files | Bam.py Bam.pyc Bed.py Bed.pyc BedSimpleRepeats.py BedSimpleRepeats.pyc BigWig.py BigWig.pyc Datatype.py Datatype.pyc Gff3.py Gff3.pyc Gtf.py Gtf.pyc TrackHub.py TrackHub.pyc hubArchiveCreator.py hubArchiveCreator.xml templates/display.html templates/display.txt util/Fasta.py util/Fasta.pyc util/subtools.py util/subtools.pyc |
diffstat | 23 files changed, 260 insertions(+), 202 deletions(-) [+] |
line wrap: on
line diff
--- a/Bam.py Wed Jul 20 12:29:08 2016 -0400 +++ b/Bam.py Thu Jul 21 05:58:51 2016 -0400 @@ -15,12 +15,8 @@ class Bam( Datatype ): - def __init__( self, input_bam_false_path, data_bam , - inputFastaFile, extra_files_path, tool_directory ): - super(Bam, self).__init__( input_fasta_file=inputFastaFile, - extra_files_path=extra_files_path, - tool_directory=tool_directory, - ) + def __init__(self, input_bam_false_path, data_bam): + super(Bam, self).__init__() self.track = None
--- a/Bed.py Wed Jul 20 12:29:08 2016 -0400 +++ b/Bed.py Thu Jul 21 05:58:51 2016 -0400 @@ -11,19 +11,14 @@ class Bed( Datatype ): - def __init__( self, inputBedGeneric, data_bed_generic, - inputFastaFile, extra_files_path, tool_directory ): - super(Bed, self).__init__( - inputFastaFile, extra_files_path, tool_directory - ) + def __init__( self, inputBedGeneric, data_bed_generic): + super(Bed, self).__init__() self.track = None self.inputBedGeneric = inputBedGeneric self.sortedBedFile = tempfile.NamedTemporaryFile(suffix=".sortedBed") - self.chromSizesFile = tempfile.NamedTemporaryFile(bufsize=0, suffix=".chrom.sizes") - self.twoBitInfoFile = tempfile.NamedTemporaryFile(bufsize=0) self.data_bed_generic = data_bed_generic self.name_bed_generic = self.data_bed_generic["name"] @@ -32,15 +27,6 @@ # Sort processing subtools.sort(self.inputBedGeneric, self.sortedBedFile.name) - # Generate the chrom.sizes - # TODO: Isolate in a function - # We first get the twoBit Infos - subtools.twoBitInfo(self.twoBitFile.name, self.twoBitInfoFile.name) - - # Then we get the output to inject into the sort - # TODO: Check if no errors - subtools.sortChromSizes(self.twoBitInfoFile.name, self.chromSizesFile.name) - # bedToBigBed processing # TODO: Change the name of the bb, to tool + genome + possible adding if multiple + .bb trackName = "".join( ( self.name_bed_generic, ".bb") )
--- a/BedSimpleRepeats.py Wed Jul 20 12:29:08 2016 -0400 +++ b/BedSimpleRepeats.py Thu Jul 21 05:58:51 2016 -0400 @@ -10,40 +10,26 @@ class BedSimpleRepeats( Datatype ): - def __init__( self, input_bed_simple_repeats_false_path, data_bed_simple_repeats, - input_fasta_file, extra_files_path, tool_directory ): + def __init__(self, input_bed_simple_repeats_false_path, data_bed_simple_repeats): - super(BedSimpleRepeats, self).__init__( - input_fasta_file, extra_files_path, tool_directory - ) + super(BedSimpleRepeats, self).__init__() self.input_bed_simple_repeats_false_path = input_bed_simple_repeats_false_path self.name_bed_simple_repeats = data_bed_simple_repeats["name"] self.priority = data_bed_simple_repeats["order_index"] sortedBedFile = tempfile.NamedTemporaryFile(suffix=".sortedBed") - twoBitInfoFile = tempfile.NamedTemporaryFile(bufsize=0) - chromSizesFile = tempfile.NamedTemporaryFile(bufsize=0, suffix=".chrom.sizes") # Sort processing subtools.sort(self.input_bed_simple_repeats_false_path, sortedBedFile.name) - # TODO: Regroup in an mother class which handles the Chrom.sizes creation with Gff3 and Gtf - # Generate the chrom.sizes - - subtools.twoBitInfo(self.twoBitFile.name, twoBitInfoFile.name) - - # Then we get the output to inject into the sort - # TODO: Check if no errors - subtools.sortChromSizes(twoBitInfoFile.name, chromSizesFile.name) - # bedToBigBed processing # TODO: Change the name of the bb, to tool + genome + .bb trackName = "".join( ( self.name_bed_simple_repeats, '.bb' ) ) myBigBedFilePath = os.path.join(self.myTrackFolderPath, trackName) auto_sql_option = "%s%s" % ('-as=', os.path.join(self.tool_directory, 'trf_simpleRepeat.as')) with open(myBigBedFilePath, 'w') as bigBedFile: - subtools.bedToBigBed(sortedBedFile.name, chromSizesFile.name, bigBedFile.name, + subtools.bedToBigBed(sortedBedFile.name, self.chromSizesFile.name, bigBedFile.name, typeOption='-type=bed4+12', autoSql=auto_sql_option)
--- a/BigWig.py Wed Jul 20 12:29:08 2016 -0400 +++ b/BigWig.py Thu Jul 21 05:58:51 2016 -0400 @@ -10,11 +10,8 @@ class BigWig( Datatype ): - def __init__(self, input_bigwig_path, data_bigwig, - input_fasta_path, extra_files_path, tool_directory): - super(BigWig, self).__init__( - input_fasta_path, extra_files_path, tool_directory - ) + def __init__(self, input_bigwig_path, data_bigwig): + super(BigWig, self).__init__() self.track = None
--- a/Datatype.py Wed Jul 20 12:29:08 2016 -0400 +++ b/Datatype.py Thu Jul 21 05:58:51 2016 -0400 @@ -6,6 +6,7 @@ """ import os +import tempfile from util import subtools @@ -14,32 +15,54 @@ twoBitFile = None - def __init__( self, input_fasta_file, extra_files_path, tool_directory ): + input_fasta_file = None + extra_files_path = None + tool_directory = None - self.input_fasta_file = input_fasta_file - self.extra_files_path = extra_files_path - self.tool_directory = tool_directory + mySpecieFolderPath = None + myTrackFolderPath = None + + twoBitFile = None + chromSizesFile = None - self.twoBitFile = None + def __init__(self): - # Construction of the arborescence - # TODO: Change the hard-coded path with a input based one - self.mySpecieFolderPath = os.path.join(extra_files_path, "myHub", "dbia3") + not_init_message = "The {0} is not initialized." \ + "Did you use pre_init static method first?" + if Datatype.input_fasta_file is None: + raise TypeError(not_init_message.format('reference genome')) + if Datatype.extra_files_path is None: + raise TypeError(not_init_message.format('track Hub path')) + if Datatype.tool_directory is None: + raise TypeError(not_init_message.format('tool directory')) + - # TODO: Refactor the name of the folder "tracks" into one variable, and should be inside TrackHub object - self.myTrackFolderPath = os.path.join(self.mySpecieFolderPath, "tracks") + @staticmethod + def pre_init(reference_genome, two_bit_path, chrom_sizes_file, + extra_files_path, tool_directory, specie_folder, tracks_folder): + Datatype.extra_files_path = extra_files_path + Datatype.tool_directory = tool_directory - # TODO: Redundant, should be refactored because they are all doing it...into hubArchiveCreator? + # TODO: All this should be in TrackHub and not in Datatype + Datatype.mySpecieFolderPath = specie_folder + Datatype.myTrackFolderPath = tracks_folder + + Datatype.input_fasta_file = reference_genome + # 2bit file creation from input fasta - if not Datatype.twoBitFile: - print "We create the self.twoBit in " + self.__class__.__name__ - Datatype.twoBitFile = subtools.faToTwoBit(self.input_fasta_file, self.mySpecieFolderPath) + Datatype.twoBitFile = two_bit_path + Datatype.chromSizesFile = chrom_sizes_file - # TODO: Remove this by saying to all children classes to use "Datatype.twoBitFile" instead - self.twoBitFile = Datatype.twoBitFile + @staticmethod + def get_largest_scaffold_name(self): + # We can get the biggest scaffold here, with chromSizesFile + with open(Datatype.chromSizesFile.name, 'r') as chrom_sizes: + # TODO: Check if exists + return chrom_sizes.readline().split()[0] + # TODO: Rename for PEP8 def getShortName( self, name_to_shortify ): # Slice to get from Long label the short label short_label_slice = slice(0, 15) - return name_to_shortify[short_label_slice] \ No newline at end of file + return name_to_shortify[short_label_slice]
--- a/Gff3.py Wed Jul 20 12:29:08 2016 -0400 +++ b/Gff3.py Thu Jul 21 05:58:51 2016 -0400 @@ -11,11 +11,8 @@ class Gff3( Datatype ): - def __init__( self, input_Gff3_false_path, data_gff3, - input_fasta_false_path, extra_files_path, tool_directory ): - super( Gff3, self ).__init__( - input_fasta_false_path, extra_files_path, tool_directory - ) + def __init__(self, input_Gff3_false_path, data_gff3): + super( Gff3, self ).__init__() self.track = None @@ -29,8 +26,6 @@ sortedBedFile = tempfile.NamedTemporaryFile(suffix=".sortedBed") # TODO: Refactor into another Class to manage the twoBitInfo and ChromSizes (same process as in Gtf.py) - twoBitInfoFile = tempfile.NamedTemporaryFile(bufsize=0) - chromSizesFile = tempfile.NamedTemporaryFile(bufsize=0, suffix=".chrom.sizes") # gff3ToGenePred processing subtools.gff3ToGenePred(self.input_Gff3_false_path, genePredFile.name) @@ -42,19 +37,14 @@ # Sort processing subtools.sort(unsortedBedFile.name, sortedBedFile.name) - # Generate the twoBitInfo - subtools.twoBitInfo(self.twoBitFile.name, twoBitInfoFile.name) - - # Then we get the output to generate the chromSizes # TODO: Check if no errors - subtools.sortChromSizes(twoBitInfoFile.name, chromSizesFile.name) # bedToBigBed processing # TODO: Change the name of the bb, to tool + genome + possible adding if multiple + .bb trackName = "".join( (self.name_gff3, ".bb" ) ) myBigBedFilePath = os.path.join(self.myTrackFolderPath, trackName) with open(myBigBedFilePath, 'w') as bigBedFile: - subtools.bedToBigBed(sortedBedFile.name, chromSizesFile.name, bigBedFile.name) + subtools.bedToBigBed(sortedBedFile.name, self.chromSizesFile.name, bigBedFile.name) # Create the Track Object dataURL = "tracks/%s" % trackName
--- a/Gtf.py Wed Jul 20 12:29:08 2016 -0400 +++ b/Gtf.py Thu Jul 21 05:58:51 2016 -0400 @@ -11,11 +11,9 @@ class Gtf( Datatype ): - def __init__( self, input_gtf_false_path, data_gtf, - input_fasta_file, extra_files_path, tool_directory ): - super(Gtf, self).__init__( input_fasta_file=input_fasta_file, - extra_files_path=extra_files_path, - tool_directory=tool_directory ) + def __init__( self, input_gtf_false_path, data_gtf): + + super(Gtf, self).__init__() self.track = None @@ -30,9 +28,6 @@ unsortedBedFile = tempfile.NamedTemporaryFile(bufsize=0, suffix=".unsortedBed") sortedBedFile = tempfile.NamedTemporaryFile(suffix=".sortedBed") - twoBitInfoFile = tempfile.NamedTemporaryFile(bufsize=0) - chromSizesFile = tempfile.NamedTemporaryFile(bufsize=0, suffix=".chrom.sizes") - # GtfToGenePred subtools.gtfToGenePred(self.input_gtf_false_path, genePredFile.name) @@ -43,20 +38,12 @@ # Sort processing subtools.sort(unsortedBedFile.name, sortedBedFile.name) - # TODO: Chehck if the twoBitInfo / ChromSizes is redundant and make an intermediate class - # Generate the twoBitInfo - subtools.twoBitInfo(self.twoBitFile.name, twoBitInfoFile.name) - - # Then we get the output to generate the chromSizes - # TODO: Check if no errors - subtools.sortChromSizes(twoBitInfoFile.name, chromSizesFile.name) - # bedToBigBed processing # TODO: Change the name of the bb, to tool + genome + possible adding if multiple + .bb trackName = "".join( ( self.name_gtf, ".bb") ) myBigBedFilePath = os.path.join(self.myTrackFolderPath, trackName) with open(myBigBedFilePath, 'w') as bigBedFile: - subtools.bedToBigBed(sortedBedFile.name, chromSizesFile.name, bigBedFile.name) + subtools.bedToBigBed(sortedBedFile.name, self.chromSizesFile.name, bigBedFile.name) # Create the Track Object dataURL = "tracks/%s" % trackName
--- a/TrackHub.py Wed Jul 20 12:29:08 2016 -0400 +++ b/TrackHub.py Thu Jul 21 05:58:51 2016 -0400 @@ -2,21 +2,35 @@ # -*- coding: utf8 -*- import os +import tempfile +import shutil import zipfile +# Internal dependencies +from Datatype import Datatype +from util import subtools + from mako.lookup import TemplateLookup class TrackHub(object): """docstring for TrackHub""" - def __init__(self, inputFastaFile, outputFile, extra_files_path, tool_directory): + def __init__(self, inputFastaFile, user_email, outputFile, extra_files_path, tool_directory): super(TrackHub, self).__init__() self.rootAssemblyHub = None + self.mySpecieFolderPath = None + self.myTracksFolderPath = None self.tool_directory = tool_directory + self.reference_genome = inputFastaFile + # TODO: Add the specie name + self.genome_name = inputFastaFile.assembly_id + self.default_pos = None + self.user_email = user_email + # TODO: Modify according to the files passed in parameter mylookup = TemplateLookup(directories=[os.path.join(tool_directory, 'templates/trackDb')], output_encoding='utf-8', encoding_errors='replace') @@ -25,17 +39,21 @@ self.extra_files_path = extra_files_path self.outputFile = outputFile - inputFastaFile = open(inputFastaFile, 'r') - #self.outputZip = zipfile.ZipFile(os.path.join(extra_files_path, 'myHub.zip'), 'w', allowZip64=True) - # Create the structure of the Assembly Hub # TODO: Merge the following processing into a function as it is also used in twoBitCreator - baseNameFasta = os.path.basename(inputFastaFile.name) - suffixTwoBit, extensionTwoBit = os.path.splitext(baseNameFasta) - self.twoBitName = suffixTwoBit + '.2bit' + self.twoBitName = None + self.two_bit_final_path = None + self.chromSizesFile = None + + self.default_pos = None - self.rootAssemblyHub = self.__createAssemblyHub__(toolDirectory=tool_directory, - extra_files_path=extra_files_path) + # Set all the missing variables of this class, and create physically the folders/files + self.rootAssemblyHub = self.__createAssemblyHub__(extra_files_path=extra_files_path) + + # Init the Datatype + Datatype.pre_init(self.reference_genome, self.two_bit_final_path, self.chromSizesFile, + self.extra_files_path, self.tool_directory, + self.mySpecieFolderPath, self.myTracksFolderPath) def createZip(self): for root, dirs, files in os.walk(self.rootAssemblyHub): @@ -60,124 +78,149 @@ def terminate(self): # Just a test to output a simple HTML + # TODO: Create a class to handle the file object + mylookup = TemplateLookup(directories=[os.path.join(self.tool_directory, 'templates')], + output_encoding='utf-8', encoding_errors='replace') + + mytemplate = mylookup.get_template('display.txt') with open(self.outputFile, 'w') as htmlOutput: - htmlOutput.write('<html>') - htmlOutput.write('<body>') - htmlOutput.write('<p>') - htmlOutput.write('The following has been generated by Hub Archive Creator:') - htmlOutput.write('</p>') - htmlOutput.write('<ul>') + # TODO: We are basically looping two times: One time with os.walk, Second time + # with the template. We could improve that if the number of files begins to be really important + list_relative_file_path = [ ] for root, dirs, files in os.walk(self.extra_files_path): for file in files: - relDir = os.path.relpath(root, self.extra_files_path) - htmlOutput.write(str.format('<li><a href="{0}">{1}</a></li>', os.path.join(relDir, file), - os.path.join(relDir, file))) - htmlOutput.write('<ul>') - htmlOutput.write('</body>') - htmlOutput.write('</html>') + relative_directory = os.path.relpath(root, self.extra_files_path) + relative_file_path = os.path.join(relative_directory, file) + list_relative_file_path.append(relative_file_path) + + htmlMakoRendered = mytemplate.render( + list_relative_file_path=list_relative_file_path + ) + htmlOutput.write(htmlMakoRendered) + + def __createAssemblyHub__(self, extra_files_path): + # Get all necessaries infos first + # 2bit file creation from input fasta - def __createAssemblyHub__(self, toolDirectory, extra_files_path): + # baseNameFasta = os.path.basename(fasta_file_name) + # suffixTwoBit, extensionTwoBit = os.path.splitext(baseNameFasta) + # nameTwoBit = suffixTwoBit + '.2bit' + twoBitFile = tempfile.NamedTemporaryFile(bufsize=0) + subtools.faToTwoBit(self.reference_genome.false_path, twoBitFile.name) + + # Generate the twoBitInfo + twoBitInfoFile = tempfile.NamedTemporaryFile(bufsize=0) + subtools.twoBitInfo(twoBitFile.name, twoBitInfoFile.name) + + # Then we get the output to generate the chromSizes + self.chromSizesFile = tempfile.NamedTemporaryFile(bufsize=0, suffix=".chrom.sizes") + subtools.sortChromSizes(twoBitInfoFile.name, self.chromSizesFile.name) + + # We can get the biggest scaffold here, with chromSizesFile + with open(self.chromSizesFile.name, 'r') as chrom_sizes: + # TODO: Check if exists + self.default_pos = chrom_sizes.readline().split()[0] + # TODO: Manage to put every fill Function in a file dedicated for reading reasons # Create the root directory myHubPath = os.path.join(extra_files_path, "myHub") if not os.path.exists(myHubPath): os.makedirs(myHubPath) + # Create the specie folder + # TODO: Generate the name depending on the specie + mySpecieFolderPath = os.path.join(myHubPath, self.genome_name) + if not os.path.exists(mySpecieFolderPath): + os.makedirs(mySpecieFolderPath) + self.mySpecieFolderPath = mySpecieFolderPath + + # We create the 2bit file while we just created the specie folder + self.twoBitName = self.genome_name + ".2bit" + self.two_bit_final_path = os.path.join(self.mySpecieFolderPath, self.twoBitName) + shutil.copyfile(twoBitFile.name, self.two_bit_final_path) + # Add the genomes.txt file genomesTxtFilePath = os.path.join(myHubPath, 'genomes.txt') - self.__fillGenomesTxt__(genomesTxtFilePath, toolDirectory) + self.__fillGenomesTxt__(genomesTxtFilePath) # Add the hub.txt file hubTxtFilePath = os.path.join(myHubPath, 'hub.txt') - self.__fillHubTxt__(hubTxtFilePath, toolDirectory) + self.__fillHubTxt__(hubTxtFilePath) # Add the hub.html file # TODO: Change the name and get it depending on the specie hubHtmlFilePath = os.path.join(myHubPath, 'dbia.html') - self.__fillHubHtmlFile__(hubHtmlFilePath, toolDirectory) + self.__fillHubHtmlFile__(hubHtmlFilePath) - # Create the specie folder - # TODO: Generate the name depending on the specie - mySpecieFolderPath = os.path.join(myHubPath, "dbia3") - if not os.path.exists(mySpecieFolderPath): - os.makedirs(mySpecieFolderPath) - self.mySpecieFolderPath = mySpecieFolderPath # Create the description html file in the specie folder descriptionHtmlFilePath = os.path.join(mySpecieFolderPath, 'description.html') - self.__fillDescriptionHtmlFile__(descriptionHtmlFilePath, toolDirectory) + self.__fillDescriptionHtmlFile__(descriptionHtmlFilePath) # Create the file groups.txt # TODO: If not inputs for this, do no create the file groupsTxtFilePath = os.path.join(mySpecieFolderPath, 'groups.txt') - self.__fillGroupsTxtFile__(groupsTxtFilePath, toolDirectory) + self.__fillGroupsTxtFile__(groupsTxtFilePath) # Create the folder tracks into the specie folder tracksFolderPath = os.path.join(mySpecieFolderPath, "tracks") if not os.path.exists(tracksFolderPath): os.makedirs(tracksFolderPath) + self.myTracksFolderPath = tracksFolderPath return myHubPath - def __fillGenomesTxt__(self, genomesTxtFilePath, toolDirectory): + def __fillGenomesTxt__(self, genomesTxtFilePath): # TODO: Think about the inputs and outputs # TODO: Manage the template of this file # renderer = pystache.Renderer(search_dirs="templates/genomesAssembly") - pathTemplate = os.path.join(toolDirectory, 'templates/genomesAssembly') + pathTemplate = os.path.join(self.tool_directory, 'templates/genomesAssembly') mylookup = TemplateLookup(directories=[pathTemplate], output_encoding='utf-8', encoding_errors='replace') mytemplate = mylookup.get_template("layout.txt") with open(genomesTxtFilePath, 'w') as genomesTxtFile: # Write the content of the file genomes.txt - twoBitPath = os.path.join('dbia3/', self.twoBitName) + twoBitPath = os.path.join(self.genome_name, self.twoBitName) htmlMakoRendered = mytemplate.render( - genomeName="dbia3", - trackDbPath="dbia3/trackDb.txt", - groupsPath="dbia3/groups.txt", - genomeDescription="March 2013 Drosophilia biarmipes unplaced genomic scaffold", + genomeName=self.genome_name, + trackDbPath=os.path.join(self.genome_name, "trackDb.txt"), + groupsPath=os.path.join(self.genome_name, "groups.txt"), + genomeDescription=self.genome_name, twoBitPath=twoBitPath, - organismName="Drosophilia biarmipes", - defaultPosition="contig1", + organismName=self.genome_name, + defaultPosition=self.default_pos, orderKey="4500", - scientificName="Drosophilia biarmipes", - pathAssemblyHtmlDescription="dbia3/description.html" + scientificName=self.genome_name, + pathAssemblyHtmlDescription=os.path.join(self.genome_name, "description.html") ) genomesTxtFile.write(htmlMakoRendered) - def __fillHubTxt__(self, hubTxtFilePath, toolDirectory): + def __fillHubTxt__(self, hubTxtFilePath): # TODO: Think about the inputs and outputs # TODO: Manage the template of this file - mylookup = TemplateLookup(directories=[os.path.join(toolDirectory, 'templates/hubTxt')], + mylookup = TemplateLookup(directories=[os.path.join(self.tool_directory, 'templates/hubTxt')], output_encoding='utf-8', encoding_errors='replace') mytemplate = mylookup.get_template('layout.txt') with open(hubTxtFilePath, 'w') as genomesTxtFile: # Write the content of the file genomes.txt htmlMakoRendered = mytemplate.render( - hubName='dbiaOnly', - shortLabel='dbia', - longLabel='This hub only contains dbia with the gene predictions', + hubName=(''.join(['gonramp', self.genome_name.title()])), + shortLabel=self.genome_name, + longLabel=self.genome_name, genomesFile='genomes.txt', - email='rmarenco@gwu.edu', + email=self.user_email, descriptionUrl='dbia.html' ) genomesTxtFile.write(htmlMakoRendered) - def __fillHubHtmlFile__(self, hubHtmlFilePath, toolDirectory): + def __fillHubHtmlFile__(self, hubHtmlFilePath): # TODO: Think about the inputs and outputs # TODO: Manage the template of this file # renderer = pystache.Renderer(search_dirs="templates/hubDescription") # t = Template(templates.hubDescription.layout.html) - mylookup = TemplateLookup(directories=[os.path.join(toolDirectory, 'templates/hubDescription')], + mylookup = TemplateLookup(directories=[os.path.join(self.tool_directory, 'templates/hubDescription')], output_encoding='utf-8', encoding_errors='replace') mytemplate = mylookup.get_template("layout.txt") with open(hubHtmlFilePath, 'w') as hubHtmlFile: - # Write the content of the file genomes.txt - # htmlPystached = renderer.render_name( - # "layout", - # {'specie': 'Dbia', - # 'toolUsed': 'Augustus', - # 'ncbiSpecieUrl': 'http://www.ncbi.nlm.nih.gov/genome/3499', - # 'genomeID': '3499', - # 'SpecieFullName': 'Drosophila biarmipes'}) htmlMakoRendered = mytemplate.render( specie='Dbia', toolUsed='Augustus', @@ -185,13 +228,12 @@ genomeID='3499', specieFullName='Drosophila biarmipes' ) - # hubHtmlFile.write(htmlPystached) - hubHtmlFile.write(htmlMakoRendered) + #hubHtmlFile.write(htmlMakoRendered) - def __fillDescriptionHtmlFile__(self, descriptionHtmlFilePath, toolDirectory): + def __fillDescriptionHtmlFile__(self, descriptionHtmlFilePath): # TODO: Think about the inputs and outputs # TODO: Manage the template of this file - mylookup = TemplateLookup(directories=[os.path.join(toolDirectory, 'templates/specieDescription')], + mylookup = TemplateLookup(directories=[os.path.join(self.tool_directory, 'templates/specieDescription')], output_encoding='utf-8', encoding_errors='replace') mytemplate = mylookup.get_template("layout.txt") with open(descriptionHtmlFilePath, 'w') as descriptionHtmlFile: @@ -199,11 +241,11 @@ htmlMakoRendered = mytemplate.render( specieDescription='This is the description of the dbia', ) - descriptionHtmlFile.write(htmlMakoRendered) + #descriptionHtmlFile.write(htmlMakoRendered) - def __fillGroupsTxtFile__(self, groupsTxtFilePath, toolDirectory): + def __fillGroupsTxtFile__(self, groupsTxtFilePath): # TODO: Reenable this function at some point - mylookup = TemplateLookup(directories=[os.path.join(toolDirectory, 'templates/groupsTxt')], + mylookup = TemplateLookup(directories=[os.path.join(self.tool_directory, 'templates/groupsTxt')], output_encoding='utf-8', encoding_errors='replace') mytemplate = mylookup.get_template("layout.txt") with open(groupsTxtFilePath, 'w') as groupsTxtFile:
--- a/hubArchiveCreator.py Wed Jul 20 12:29:08 2016 -0400 +++ b/hubArchiveCreator.py Thu Jul 21 05:58:51 2016 -0400 @@ -14,13 +14,14 @@ import sys # Internal dependencies -from TrackHub import TrackHub -from Gff3 import Gff3 from Bam import Bam from BedSimpleRepeats import BedSimpleRepeats from Bed import Bed from BigWig import BigWig +from util.Fasta import Fasta +from Gff3 import Gff3 from Gtf import Gtf +from TrackHub import TrackHub # TODO: Verify each subprocessed dependency is accessible [gff3ToGenePred, genePredToBed, twoBitInfo, faToTwoBit, bedToBigBed, sort @@ -62,6 +63,10 @@ parser.add_argument('-j', '--data_json', help='Json containing the metadata of the inputs') + parser.add_argument('--user_email', help='Email of the user who launched the Hub Archive Creation') + + parser.add_argument('--genome_name', help='UCSC Genome Browser assembly ID') + ucsc_tools_path = '' toolDirectory = '.' @@ -70,11 +75,20 @@ # Get the args passed in parameter args = parser.parse_args() - input_fasta_file = args.fasta + array_inputs_reference_genome = json.loads(args.fasta) + + # TODO: Replace these with the object Fasta + input_fasta_file = array_inputs_reference_genome["false_path"] + input_fasta_file_name = sanitize_name_input(array_inputs_reference_genome["name"]) + genome_name = sanitize_name_input(args.genome_name) + + reference_genome = Fasta(array_inputs_reference_genome["false_path"], + input_fasta_file_name, genome_name) + + user_email = args.user_email # TODO: Add array for each input because we can add multiple -b for example + filter the data associated - array_inputs_gff3 = args.gff3 array_inputs_bed_simple_repeats = args.bedSimpleRepeats array_inputs_bed_generic = args.bed @@ -96,42 +110,38 @@ if args.extra_files_path: extra_files_path = args.extra_files_path - # TODO: Check here all the binaries / tools we need. Exception is missing + # TODO: Check here all the binaries / tools we need. Exception if missing # Create the Track Hub folder - trackHub = TrackHub(input_fasta_file, outputFile, extra_files_path, toolDirectory) + trackHub = TrackHub(reference_genome, user_email, outputFile, extra_files_path, toolDirectory) all_datatype_dictionary = {} + datatype_parameters = (inputs_data, all_datatype_dictionary) + # Process Augustus if array_inputs_gff3: - create_ordered_datatype_objects(Gff3, array_inputs_gff3, inputs_data, input_fasta_file, - extra_files_path, all_datatype_dictionary, toolDirectory) + create_ordered_datatype_objects(Gff3, array_inputs_gff3, *datatype_parameters) - # Process Bed simple repeats => From Tandem Repeats Finder / TrfBig + # Process Bed simple repeats if array_inputs_bed_simple_repeats: - create_ordered_datatype_objects(BedSimpleRepeats, array_inputs_bed_simple_repeats, inputs_data, input_fasta_file, - extra_files_path, all_datatype_dictionary, toolDirectory) + create_ordered_datatype_objects(BedSimpleRepeats, array_inputs_bed_simple_repeats, *datatype_parameters) - # Process a Bed => tBlastN or TopHat + # Process Bed if array_inputs_bed_generic: - create_ordered_datatype_objects(Bed, array_inputs_bed_generic, inputs_data, input_fasta_file, - extra_files_path, all_datatype_dictionary, toolDirectory) + create_ordered_datatype_objects(Bed, array_inputs_bed_generic, *datatype_parameters) - # Process a GTF => Tophat + # Process GTF if array_inputs_gtf: - create_ordered_datatype_objects(Gtf, array_inputs_gtf, inputs_data, input_fasta_file, - extra_files_path, all_datatype_dictionary, toolDirectory) + create_ordered_datatype_objects(Gtf, array_inputs_gtf, *datatype_parameters) - # Process a Bam => Tophat + # Process Bam if array_inputs_bam: - create_ordered_datatype_objects(Bam, array_inputs_bam, inputs_data, input_fasta_file, - extra_files_path, all_datatype_dictionary, toolDirectory) + create_ordered_datatype_objects(Bam, array_inputs_bam, *datatype_parameters) - # Process a BigWig => From Bam + # Process BigWig if array_inputs_bigwig: - create_ordered_datatype_objects(BigWig, array_inputs_bigwig, inputs_data, input_fasta_file, - extra_files_path, all_datatype_dictionary, toolDirectory) + create_ordered_datatype_objects(BigWig, array_inputs_bigwig, *datatype_parameters) # Create Ordered Dictionary to add the tracks in the tool form order all_datatype_ordered_dictionary = collections.OrderedDict(all_datatype_dictionary) @@ -147,6 +157,10 @@ sys.exit(0) +def sanitize_name_input(string_to_sanitize): + return string_to_sanitize \ + .replace("/", "_") \ + .replace(" ", "_") def sanitize_name_inputs(inputs_data): """ @@ -156,22 +170,16 @@ :return: """ for key in inputs_data: - inputs_data[key]["name"] = inputs_data[key]["name"]\ - .replace("/", "_")\ - .replace(" ", "_") + inputs_data[key]["name"] = sanitize_name_input(inputs_data[key]["name"]) -def create_ordered_datatype_objects(ExtensionClass, array_inputs, inputs_data, input_fasta_file, - extra_files_path, all_datatype_dictionary, tool_directory): +def create_ordered_datatype_objects(ExtensionClass, array_inputs, inputs_data, all_datatype_dictionary): """ Function which executes the creation all the necessary files / folders for a special Datatype, for TrackHub and update the dictionary of datatype :param ExtensionClass: T <= Datatype :param array_inputs: list[string] :param inputs_data: - :param input_fasta_file: string - :param extra_files_path: string - :param tool_directory; string """ datatype_dictionary = {} @@ -180,8 +188,8 @@ for input_false_path in array_inputs: for key, data_value in inputs_data.items(): if key == input_false_path: - extensionObject = ExtensionClass(input_false_path, data_value, - input_fasta_file, extra_files_path, tool_directory) + extensionObject = ExtensionClass(input_false_path, data_value) + datatype_dictionary.update({data_value["order_index"]: extensionObject}) all_datatype_dictionary.update(datatype_dictionary)
--- a/hubArchiveCreator.xml Wed Jul 20 12:29:08 2016 -0400 +++ b/hubArchiveCreator.xml Thu Jul 21 05:58:51 2016 -0400 @@ -31,6 +31,9 @@ mkdir -p $output.extra_files_path; python $__tool_directory__/hubArchiveCreator.py + ## Ask the user to enter the genome name + --genome_name '$genome_name' + #import json #set global data_parameter_dict = {} @@ -81,19 +84,32 @@ #end if #end for + ## We combine the fasta file dataset name with his false path in a JSON object + #set fasta_json = json.dumps({"false_path": str($fasta_file), "name": $fasta_file.name}) + -f '$fasta_json' + ## Dump the final json #set all_data_json = json.dumps($data_parameter_dict) - -f $Fasta_File --data_json '$all_data_json' + ## Retrieve the user email + --user_email $__user_email__ + -d $__tool_directory__ -e $output.files_path -o $output; ]]></command> <inputs> <param + name="genome_name" + type="text" + size="30" + value="unknown" + label="UCSC Genome Browser assembly ID" + /> + <param format="fasta" - name="Fasta_File" + name="fasta_file" type="data" label="Reference genome" /> @@ -175,7 +191,7 @@ <!-- Can also use assert_command to test command --> <!-- Testing GFF3 input --> <test> - <param name="Fasta_File" value="dbia3.fa"/> + <param name="fasta_file" value="dbia3.fa"/> <repeat name="format"> <conditional name="formatChoice"> <param name="format_select" value="gff3"/> @@ -209,7 +225,7 @@ </output> </test> <test> - <param name="Fasta_File" value="dbia3.fa"/> + <param name="fasta_file" value="dbia3.fa"/> <param name="GFF3" value="augustusDbia3.gff3"/> <output name="output" file="augustusOutput.html" lines_diff="2"> <extra_files type="directory" value="myHub"/>
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/templates/display.txt Thu Jul 21 05:58:51 2016 -0400 @@ -0,0 +1,15 @@ +<%namespace name="os" module="os"/> +<html> + <body> + <p> + The following has been generated by Hub Archive Creator: + </p> + <ul> + % for relative_file_path in list_relative_file_path: + <li> + <a href="${relative_file_path}">${relative_file_path}</a> + </li> + % endfor + </ul> + </body> +</html> \ No newline at end of file
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/util/Fasta.py Thu Jul 21 05:58:51 2016 -0400 @@ -0,0 +1,16 @@ +#!/usr/bin/python +# -*- coding: utf8 -*- + +""" +Class describing the Fasta format +(As of the 07/20/2016, only used with the reference genome) +""" + +class Fasta(object): + def __init__(self, false_path, name, assembly_id): + self.false_path = false_path + self.name = name + + if not assembly_id: + assembly_id = "unknown" + self.assembly_id = assembly_id \ No newline at end of file
--- a/util/subtools.py Wed Jul 20 12:29:08 2016 -0400 +++ b/util/subtools.py Thu Jul 21 05:58:51 2016 -0400 @@ -39,20 +39,16 @@ return p -def faToTwoBit(fasta_file_name, mySpecieFolder): +def faToTwoBit(fasta_file_name, twoBitFile): """ This function call faToTwoBit UCSC tool, and return the twoBitFile :param fasta_file_name: :param mySpecieFolder: :return: """ - baseNameFasta = os.path.basename(fasta_file_name) - suffixTwoBit, extensionTwoBit = os.path.splitext(baseNameFasta) - nameTwoBit = suffixTwoBit + '.2bit' - with open(os.path.join(mySpecieFolder, nameTwoBit), 'w') as twoBitFile: - array_call = ['faToTwoBit', fasta_file_name, twoBitFile.name] - _handleExceptionAndCheckCall(array_call) + array_call = ['faToTwoBit', fasta_file_name, twoBitFile] + _handleExceptionAndCheckCall(array_call) return twoBitFile