Mercurial > repos > rmarenco > hubarchivecreator
diff Gtf.py @ 1:fb5e60d4d18a draft
planemo upload for repository https://github.com/goeckslab/hub-archive-creator commit 64cfc08088d11f6818c1b4e5514ef9e67969eaff-dirty
author | rmarenco |
---|---|
date | Wed, 13 Jul 2016 13:36:37 -0400 |
parents | |
children | acc233161f50 |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/Gtf.py Wed Jul 13 13:36:37 2016 -0400 @@ -0,0 +1,78 @@ +#!/usr/bin/python + +import os +import tempfile + +# Internal dependencies +from Datatype import Datatype +from Track import Track +from TrackDb import TrackDb +from util import subtools + + +class Gtf( Datatype ): + def __init__( self, input_gtf_false_path, data_gtf, + input_fasta_file, extra_files_path, tool_directory ): + super(Gtf, self).__init__( input_fasta_file=input_fasta_file, + extra_files_path=extra_files_path, + tool_directory=tool_directory ) + + self.track = None + + self.input_gtf_false_path = input_gtf_false_path + self.name_gtf = data_gtf["name"] + self.priority = data_gtf["order_index"] + + print "Creating TrackHub GTF from (falsePath: %s; name: %s)" % ( self.input_gtf_false_path, self.name_gtf) + + # TODO: See if we need these temporary files as part of the generated files + genePredFile = tempfile.NamedTemporaryFile(bufsize=0, suffix=".genePred") + unsortedBedFile = tempfile.NamedTemporaryFile(bufsize=0, suffix=".unsortedBed") + sortedBedFile = tempfile.NamedTemporaryFile(suffix=".sortedBed") + + twoBitInfoFile = tempfile.NamedTemporaryFile(bufsize=0) + chromSizesFile = tempfile.NamedTemporaryFile(bufsize=0, suffix=".chrom.sizes") + + # GtfToGenePred + subtools.gtfToGenePred(self.input_gtf_false_path, genePredFile.name) + + # TODO: From there, refactor because common use with Gff3.py + # genePredToBed processing + subtools.genePredToBed(genePredFile.name, unsortedBedFile.name) + + # Sort processing + subtools.sort(unsortedBedFile.name, sortedBedFile.name) + + # TODO: Chehck if the twoBitInfo / ChromSizes is redundant and make an intermediate class + # Generate the twoBitInfo + subtools.twoBitInfo(self.twoBitFile.name, twoBitInfoFile.name) + + # Then we get the output to generate the chromSizes + # TODO: Check if no errors + subtools.sortChromSizes(twoBitInfoFile.name, chromSizesFile.name) + + # bedToBigBed processing + # TODO: Change the name of the bb, to tool + genome + possible adding if multiple + .bb + trackName = "".join( ( self.name_gtf, ".bb") ) + myBigBedFilePath = os.path.join(self.myTrackFolderPath, trackName) + with open(myBigBedFilePath, 'w') as bigBedFile: + subtools.bedToBigBed(sortedBedFile.name, chromSizesFile.name, bigBedFile.name) + + # Create the Track Object + dataURL = "tracks/%s" % trackName + + trackDb = TrackDb( + trackName=trackName, + longLabel=self.name_gtf, + shortLabel=self.getShortName( self.name_gtf ), + trackDataURL=dataURL, + trackType='bigBed 12 +', + visibility='dense', + priority=self.priority, + ) + self.track = Track( + trackFile=myBigBedFilePath, + trackDb=trackDb, + ) + + print("- %s created in %s" % (trackName, myBigBedFilePath))