diff Gff3.py @ 1:fb5e60d4d18a draft

planemo upload for repository https://github.com/goeckslab/hub-archive-creator commit 64cfc08088d11f6818c1b4e5514ef9e67969eaff-dirty
author rmarenco
date Wed, 13 Jul 2016 13:36:37 -0400
parents
children acc233161f50
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/Gff3.py	Wed Jul 13 13:36:37 2016 -0400
@@ -0,0 +1,77 @@
+#!/usr/bin/python
+
+import os
+import tempfile
+
+# Internal dependencies
+from Datatype import Datatype
+from Track import Track
+from TrackDb import TrackDb
+from util import subtools
+
+
+class Gff3( Datatype ):
+    def __init__( self, input_Gff3_false_path, data_gff3,
+                  input_fasta_false_path, extra_files_path, tool_directory ):
+        super( Gff3, self ).__init__(
+                input_fasta_false_path, extra_files_path, tool_directory
+        )
+
+        self.track = None
+
+        self.input_Gff3_false_path = input_Gff3_false_path
+        self.name_gff3 = data_gff3["name"]
+        self.priority = data_gff3["order_index"]
+
+        # TODO: See if we need these temporary files as part of the generated files
+        genePredFile = tempfile.NamedTemporaryFile(bufsize=0, suffix=".genePred")
+        unsortedBedFile = tempfile.NamedTemporaryFile(bufsize=0, suffix=".unsortedBed")
+        sortedBedFile = tempfile.NamedTemporaryFile(suffix=".sortedBed")
+
+        # TODO: Refactor into another Class to manage the twoBitInfo and ChromSizes (same process as in Gtf.py)
+        twoBitInfoFile = tempfile.NamedTemporaryFile(bufsize=0)
+        chromSizesFile = tempfile.NamedTemporaryFile(bufsize=0, suffix=".chrom.sizes")
+
+        # gff3ToGenePred processing
+        subtools.gff3ToGenePred(self.input_Gff3_false_path, genePredFile.name)
+
+        # TODO: From there, refactor because common use with Gtf.py
+        # genePredToBed processing
+        subtools.genePredToBed(genePredFile.name, unsortedBedFile.name)
+
+        # Sort processing
+        subtools.sort(unsortedBedFile.name, sortedBedFile.name)
+
+        # Generate the twoBitInfo
+        subtools.twoBitInfo(self.twoBitFile.name, twoBitInfoFile.name)
+
+        # Then we get the output to generate the chromSizes
+        # TODO: Check if no errors
+        subtools.sortChromSizes(twoBitInfoFile.name, chromSizesFile.name)
+
+        # bedToBigBed processing
+        # TODO: Change the name of the bb, to tool + genome + possible adding if multiple +  .bb
+        trackName = "".join( (self.name_gff3, ".bb" ) )
+        myBigBedFilePath = os.path.join(self.myTrackFolderPath, trackName)
+        with open(myBigBedFilePath, 'w') as bigBedFile:
+            subtools.bedToBigBed(sortedBedFile.name, chromSizesFile.name, bigBedFile.name)
+
+        # Create the Track Object
+        dataURL = "tracks/%s" % trackName
+
+        trackDb = TrackDb(
+            trackName=trackName,
+            longLabel=self.name_gff3,
+            shortLabel=self.getShortName( self.name_gff3 ),
+            trackDataURL=dataURL,
+            trackType='bigBed 12 +',
+            visibility='dense',
+            priority=self.priority,
+        )
+
+        self.track = Track(
+            trackFile=myBigBedFilePath,
+            trackDb=trackDb,
+        )
+
+        print("- %s created in %s" % (trackName, myBigBedFilePath))