# HG changeset patch # User rmarenco # Date 1469095131 14400 # Node ID acc233161f507fc697d749279d6a1b249bc25c53 # Parent 4f9847539a28d8bc7269536292b15a3ae00bbab8 planemo upload for repository https://github.com/goeckslab/hub-archive-creator commit 1b1063f90004764bcf504f4340738eca5c4b1f9d diff -r 4f9847539a28 -r acc233161f50 Bam.py --- a/Bam.py Wed Jul 20 12:29:08 2016 -0400 +++ b/Bam.py Thu Jul 21 05:58:51 2016 -0400 @@ -15,12 +15,8 @@ class Bam( Datatype ): - def __init__( self, input_bam_false_path, data_bam , - inputFastaFile, extra_files_path, tool_directory ): - super(Bam, self).__init__( input_fasta_file=inputFastaFile, - extra_files_path=extra_files_path, - tool_directory=tool_directory, - ) + def __init__(self, input_bam_false_path, data_bam): + super(Bam, self).__init__() self.track = None diff -r 4f9847539a28 -r acc233161f50 Bam.pyc Binary file Bam.pyc has changed diff -r 4f9847539a28 -r acc233161f50 Bed.py --- a/Bed.py Wed Jul 20 12:29:08 2016 -0400 +++ b/Bed.py Thu Jul 21 05:58:51 2016 -0400 @@ -11,19 +11,14 @@ class Bed( Datatype ): - def __init__( self, inputBedGeneric, data_bed_generic, - inputFastaFile, extra_files_path, tool_directory ): - super(Bed, self).__init__( - inputFastaFile, extra_files_path, tool_directory - ) + def __init__( self, inputBedGeneric, data_bed_generic): + super(Bed, self).__init__() self.track = None self.inputBedGeneric = inputBedGeneric self.sortedBedFile = tempfile.NamedTemporaryFile(suffix=".sortedBed") - self.chromSizesFile = tempfile.NamedTemporaryFile(bufsize=0, suffix=".chrom.sizes") - self.twoBitInfoFile = tempfile.NamedTemporaryFile(bufsize=0) self.data_bed_generic = data_bed_generic self.name_bed_generic = self.data_bed_generic["name"] @@ -32,15 +27,6 @@ # Sort processing subtools.sort(self.inputBedGeneric, self.sortedBedFile.name) - # Generate the chrom.sizes - # TODO: Isolate in a function - # We first get the twoBit Infos - subtools.twoBitInfo(self.twoBitFile.name, self.twoBitInfoFile.name) - - # Then we get the output to inject into the sort - # TODO: Check if no errors - subtools.sortChromSizes(self.twoBitInfoFile.name, self.chromSizesFile.name) - # bedToBigBed processing # TODO: Change the name of the bb, to tool + genome + possible adding if multiple + .bb trackName = "".join( ( self.name_bed_generic, ".bb") ) diff -r 4f9847539a28 -r acc233161f50 Bed.pyc Binary file Bed.pyc has changed diff -r 4f9847539a28 -r acc233161f50 BedSimpleRepeats.py --- a/BedSimpleRepeats.py Wed Jul 20 12:29:08 2016 -0400 +++ b/BedSimpleRepeats.py Thu Jul 21 05:58:51 2016 -0400 @@ -10,40 +10,26 @@ class BedSimpleRepeats( Datatype ): - def __init__( self, input_bed_simple_repeats_false_path, data_bed_simple_repeats, - input_fasta_file, extra_files_path, tool_directory ): + def __init__(self, input_bed_simple_repeats_false_path, data_bed_simple_repeats): - super(BedSimpleRepeats, self).__init__( - input_fasta_file, extra_files_path, tool_directory - ) + super(BedSimpleRepeats, self).__init__() self.input_bed_simple_repeats_false_path = input_bed_simple_repeats_false_path self.name_bed_simple_repeats = data_bed_simple_repeats["name"] self.priority = data_bed_simple_repeats["order_index"] sortedBedFile = tempfile.NamedTemporaryFile(suffix=".sortedBed") - twoBitInfoFile = tempfile.NamedTemporaryFile(bufsize=0) - chromSizesFile = tempfile.NamedTemporaryFile(bufsize=0, suffix=".chrom.sizes") # Sort processing subtools.sort(self.input_bed_simple_repeats_false_path, sortedBedFile.name) - # TODO: Regroup in an mother class which handles the Chrom.sizes creation with Gff3 and Gtf - # Generate the chrom.sizes - - subtools.twoBitInfo(self.twoBitFile.name, twoBitInfoFile.name) - - # Then we get the output to inject into the sort - # TODO: Check if no errors - subtools.sortChromSizes(twoBitInfoFile.name, chromSizesFile.name) - # bedToBigBed processing # TODO: Change the name of the bb, to tool + genome + .bb trackName = "".join( ( self.name_bed_simple_repeats, '.bb' ) ) myBigBedFilePath = os.path.join(self.myTrackFolderPath, trackName) auto_sql_option = "%s%s" % ('-as=', os.path.join(self.tool_directory, 'trf_simpleRepeat.as')) with open(myBigBedFilePath, 'w') as bigBedFile: - subtools.bedToBigBed(sortedBedFile.name, chromSizesFile.name, bigBedFile.name, + subtools.bedToBigBed(sortedBedFile.name, self.chromSizesFile.name, bigBedFile.name, typeOption='-type=bed4+12', autoSql=auto_sql_option) diff -r 4f9847539a28 -r acc233161f50 BedSimpleRepeats.pyc Binary file BedSimpleRepeats.pyc has changed diff -r 4f9847539a28 -r acc233161f50 BigWig.py --- a/BigWig.py Wed Jul 20 12:29:08 2016 -0400 +++ b/BigWig.py Thu Jul 21 05:58:51 2016 -0400 @@ -10,11 +10,8 @@ class BigWig( Datatype ): - def __init__(self, input_bigwig_path, data_bigwig, - input_fasta_path, extra_files_path, tool_directory): - super(BigWig, self).__init__( - input_fasta_path, extra_files_path, tool_directory - ) + def __init__(self, input_bigwig_path, data_bigwig): + super(BigWig, self).__init__() self.track = None diff -r 4f9847539a28 -r acc233161f50 BigWig.pyc Binary file BigWig.pyc has changed diff -r 4f9847539a28 -r acc233161f50 Datatype.py --- a/Datatype.py Wed Jul 20 12:29:08 2016 -0400 +++ b/Datatype.py Thu Jul 21 05:58:51 2016 -0400 @@ -6,6 +6,7 @@ """ import os +import tempfile from util import subtools @@ -14,32 +15,54 @@ twoBitFile = None - def __init__( self, input_fasta_file, extra_files_path, tool_directory ): + input_fasta_file = None + extra_files_path = None + tool_directory = None - self.input_fasta_file = input_fasta_file - self.extra_files_path = extra_files_path - self.tool_directory = tool_directory + mySpecieFolderPath = None + myTrackFolderPath = None + + twoBitFile = None + chromSizesFile = None - self.twoBitFile = None + def __init__(self): - # Construction of the arborescence - # TODO: Change the hard-coded path with a input based one - self.mySpecieFolderPath = os.path.join(extra_files_path, "myHub", "dbia3") + not_init_message = "The {0} is not initialized." \ + "Did you use pre_init static method first?" + if Datatype.input_fasta_file is None: + raise TypeError(not_init_message.format('reference genome')) + if Datatype.extra_files_path is None: + raise TypeError(not_init_message.format('track Hub path')) + if Datatype.tool_directory is None: + raise TypeError(not_init_message.format('tool directory')) + - # TODO: Refactor the name of the folder "tracks" into one variable, and should be inside TrackHub object - self.myTrackFolderPath = os.path.join(self.mySpecieFolderPath, "tracks") + @staticmethod + def pre_init(reference_genome, two_bit_path, chrom_sizes_file, + extra_files_path, tool_directory, specie_folder, tracks_folder): + Datatype.extra_files_path = extra_files_path + Datatype.tool_directory = tool_directory - # TODO: Redundant, should be refactored because they are all doing it...into hubArchiveCreator? + # TODO: All this should be in TrackHub and not in Datatype + Datatype.mySpecieFolderPath = specie_folder + Datatype.myTrackFolderPath = tracks_folder + + Datatype.input_fasta_file = reference_genome + # 2bit file creation from input fasta - if not Datatype.twoBitFile: - print "We create the self.twoBit in " + self.__class__.__name__ - Datatype.twoBitFile = subtools.faToTwoBit(self.input_fasta_file, self.mySpecieFolderPath) + Datatype.twoBitFile = two_bit_path + Datatype.chromSizesFile = chrom_sizes_file - # TODO: Remove this by saying to all children classes to use "Datatype.twoBitFile" instead - self.twoBitFile = Datatype.twoBitFile + @staticmethod + def get_largest_scaffold_name(self): + # We can get the biggest scaffold here, with chromSizesFile + with open(Datatype.chromSizesFile.name, 'r') as chrom_sizes: + # TODO: Check if exists + return chrom_sizes.readline().split()[0] + # TODO: Rename for PEP8 def getShortName( self, name_to_shortify ): # Slice to get from Long label the short label short_label_slice = slice(0, 15) - return name_to_shortify[short_label_slice] \ No newline at end of file + return name_to_shortify[short_label_slice] diff -r 4f9847539a28 -r acc233161f50 Datatype.pyc Binary file Datatype.pyc has changed diff -r 4f9847539a28 -r acc233161f50 Gff3.py --- a/Gff3.py Wed Jul 20 12:29:08 2016 -0400 +++ b/Gff3.py Thu Jul 21 05:58:51 2016 -0400 @@ -11,11 +11,8 @@ class Gff3( Datatype ): - def __init__( self, input_Gff3_false_path, data_gff3, - input_fasta_false_path, extra_files_path, tool_directory ): - super( Gff3, self ).__init__( - input_fasta_false_path, extra_files_path, tool_directory - ) + def __init__(self, input_Gff3_false_path, data_gff3): + super( Gff3, self ).__init__() self.track = None @@ -29,8 +26,6 @@ sortedBedFile = tempfile.NamedTemporaryFile(suffix=".sortedBed") # TODO: Refactor into another Class to manage the twoBitInfo and ChromSizes (same process as in Gtf.py) - twoBitInfoFile = tempfile.NamedTemporaryFile(bufsize=0) - chromSizesFile = tempfile.NamedTemporaryFile(bufsize=0, suffix=".chrom.sizes") # gff3ToGenePred processing subtools.gff3ToGenePred(self.input_Gff3_false_path, genePredFile.name) @@ -42,19 +37,14 @@ # Sort processing subtools.sort(unsortedBedFile.name, sortedBedFile.name) - # Generate the twoBitInfo - subtools.twoBitInfo(self.twoBitFile.name, twoBitInfoFile.name) - - # Then we get the output to generate the chromSizes # TODO: Check if no errors - subtools.sortChromSizes(twoBitInfoFile.name, chromSizesFile.name) # bedToBigBed processing # TODO: Change the name of the bb, to tool + genome + possible adding if multiple + .bb trackName = "".join( (self.name_gff3, ".bb" ) ) myBigBedFilePath = os.path.join(self.myTrackFolderPath, trackName) with open(myBigBedFilePath, 'w') as bigBedFile: - subtools.bedToBigBed(sortedBedFile.name, chromSizesFile.name, bigBedFile.name) + subtools.bedToBigBed(sortedBedFile.name, self.chromSizesFile.name, bigBedFile.name) # Create the Track Object dataURL = "tracks/%s" % trackName diff -r 4f9847539a28 -r acc233161f50 Gff3.pyc Binary file Gff3.pyc has changed diff -r 4f9847539a28 -r acc233161f50 Gtf.py --- a/Gtf.py Wed Jul 20 12:29:08 2016 -0400 +++ b/Gtf.py Thu Jul 21 05:58:51 2016 -0400 @@ -11,11 +11,9 @@ class Gtf( Datatype ): - def __init__( self, input_gtf_false_path, data_gtf, - input_fasta_file, extra_files_path, tool_directory ): - super(Gtf, self).__init__( input_fasta_file=input_fasta_file, - extra_files_path=extra_files_path, - tool_directory=tool_directory ) + def __init__( self, input_gtf_false_path, data_gtf): + + super(Gtf, self).__init__() self.track = None @@ -30,9 +28,6 @@ unsortedBedFile = tempfile.NamedTemporaryFile(bufsize=0, suffix=".unsortedBed") sortedBedFile = tempfile.NamedTemporaryFile(suffix=".sortedBed") - twoBitInfoFile = tempfile.NamedTemporaryFile(bufsize=0) - chromSizesFile = tempfile.NamedTemporaryFile(bufsize=0, suffix=".chrom.sizes") - # GtfToGenePred subtools.gtfToGenePred(self.input_gtf_false_path, genePredFile.name) @@ -43,20 +38,12 @@ # Sort processing subtools.sort(unsortedBedFile.name, sortedBedFile.name) - # TODO: Chehck if the twoBitInfo / ChromSizes is redundant and make an intermediate class - # Generate the twoBitInfo - subtools.twoBitInfo(self.twoBitFile.name, twoBitInfoFile.name) - - # Then we get the output to generate the chromSizes - # TODO: Check if no errors - subtools.sortChromSizes(twoBitInfoFile.name, chromSizesFile.name) - # bedToBigBed processing # TODO: Change the name of the bb, to tool + genome + possible adding if multiple + .bb trackName = "".join( ( self.name_gtf, ".bb") ) myBigBedFilePath = os.path.join(self.myTrackFolderPath, trackName) with open(myBigBedFilePath, 'w') as bigBedFile: - subtools.bedToBigBed(sortedBedFile.name, chromSizesFile.name, bigBedFile.name) + subtools.bedToBigBed(sortedBedFile.name, self.chromSizesFile.name, bigBedFile.name) # Create the Track Object dataURL = "tracks/%s" % trackName diff -r 4f9847539a28 -r acc233161f50 Gtf.pyc Binary file Gtf.pyc has changed diff -r 4f9847539a28 -r acc233161f50 TrackHub.py --- a/TrackHub.py Wed Jul 20 12:29:08 2016 -0400 +++ b/TrackHub.py Thu Jul 21 05:58:51 2016 -0400 @@ -2,21 +2,35 @@ # -*- coding: utf8 -*- import os +import tempfile +import shutil import zipfile +# Internal dependencies +from Datatype import Datatype +from util import subtools + from mako.lookup import TemplateLookup class TrackHub(object): """docstring for TrackHub""" - def __init__(self, inputFastaFile, outputFile, extra_files_path, tool_directory): + def __init__(self, inputFastaFile, user_email, outputFile, extra_files_path, tool_directory): super(TrackHub, self).__init__() self.rootAssemblyHub = None + self.mySpecieFolderPath = None + self.myTracksFolderPath = None self.tool_directory = tool_directory + self.reference_genome = inputFastaFile + # TODO: Add the specie name + self.genome_name = inputFastaFile.assembly_id + self.default_pos = None + self.user_email = user_email + # TODO: Modify according to the files passed in parameter mylookup = TemplateLookup(directories=[os.path.join(tool_directory, 'templates/trackDb')], output_encoding='utf-8', encoding_errors='replace') @@ -25,17 +39,21 @@ self.extra_files_path = extra_files_path self.outputFile = outputFile - inputFastaFile = open(inputFastaFile, 'r') - #self.outputZip = zipfile.ZipFile(os.path.join(extra_files_path, 'myHub.zip'), 'w', allowZip64=True) - # Create the structure of the Assembly Hub # TODO: Merge the following processing into a function as it is also used in twoBitCreator - baseNameFasta = os.path.basename(inputFastaFile.name) - suffixTwoBit, extensionTwoBit = os.path.splitext(baseNameFasta) - self.twoBitName = suffixTwoBit + '.2bit' + self.twoBitName = None + self.two_bit_final_path = None + self.chromSizesFile = None + + self.default_pos = None - self.rootAssemblyHub = self.__createAssemblyHub__(toolDirectory=tool_directory, - extra_files_path=extra_files_path) + # Set all the missing variables of this class, and create physically the folders/files + self.rootAssemblyHub = self.__createAssemblyHub__(extra_files_path=extra_files_path) + + # Init the Datatype + Datatype.pre_init(self.reference_genome, self.two_bit_final_path, self.chromSizesFile, + self.extra_files_path, self.tool_directory, + self.mySpecieFolderPath, self.myTracksFolderPath) def createZip(self): for root, dirs, files in os.walk(self.rootAssemblyHub): @@ -60,124 +78,149 @@ def terminate(self): # Just a test to output a simple HTML + # TODO: Create a class to handle the file object + mylookup = TemplateLookup(directories=[os.path.join(self.tool_directory, 'templates')], + output_encoding='utf-8', encoding_errors='replace') + + mytemplate = mylookup.get_template('display.txt') with open(self.outputFile, 'w') as htmlOutput: - htmlOutput.write('') - htmlOutput.write('') - htmlOutput.write('

') - htmlOutput.write('The following has been generated by Hub Archive Creator:') - htmlOutput.write('

') - htmlOutput.write('