changeset 11:d05236b15f81 draft

planemo upload for repository https://github.com/goeckslab/hub-archive-creator commit 3760d0c8353b924ecf994131a5c2eb381aa81fb2
author rmarenco
date Wed, 27 Jul 2016 10:10:49 -0400
parents acc233161f50
children 747475757cb0
files Bam.py Bam.pyc Bed.py Bed.pyc BedSimpleRepeats.py BedSimpleRepeats.pyc BigWig.py BigWig.pyc Datatype.py Datatype.pyc Gff3.py Gff3.pyc Gtf.py Gtf.pyc TrackHub.py TrackHub.pyc hubArchiveCreator.py hubArchiveCreator.xml util/Fasta.pyc util/subtools.pyc
diffstat 20 files changed, 222 insertions(+), 143 deletions(-) [+]
line wrap: on
line diff
--- a/Bam.py	Thu Jul 21 05:58:51 2016 -0400
+++ b/Bam.py	Wed Jul 27 10:10:49 2016 -0400
@@ -24,11 +24,15 @@
 
         self.data_bam = data_bam
         # TODO: Check if it already contains the .bam extension / Do a function in Datatype which check the extension
-        self.name_bam = self.data_bam["name"] + ".bam"
+        if ".bam" not in self.data_bam["name"]:
+            self.name_bam = self.data_bam["name"] + ".bam"
+        else:
+            self.name_bam = self.data_bam["name"]
+
         self.priority = self.data_bam["order_index"]
         self.index_bam = self.data_bam["index"]
 
-        print "Creating TrackHub BAM from (falsePath: %s; name: %s)" % ( self.input_bam_false_path, self.name_bam)
+        #print "Creating TrackHub BAM from (falsePath: %s; name: %s)" % ( self.input_bam_false_path, self.name_bam)
 
         # First: Add the bam file
         # Second: Add the bam index file, in the same folder (https://genome.ucsc.edu/goldenpath/help/bam.html)
@@ -42,23 +46,29 @@
         shutil.copyfile(self.index_bam, bam_index_file_path)
 
         # Create the Track Object
-        dataURL = "tracks/%s" % self.name_bam
+        self.createTrack(file_path=self.name_bam,
+                         track_name=self.name_bam,
+                         long_label=self.name_bam, track_type='bam', visibility='pack', priority=self.priority,
+                         track_file=bam_index_file_path)
+        #
+        # dataURL = "tracks/%s" % self.name_bam
+        #
+        # trackDb = TrackDb(
+        #     trackName=self.name_bam,
+        #     longLabel=self.name_bam,
+        #     shortLabel=self.getShortName( self.name_bam ),
+        #     trackDataURL=dataURL,
+        #     trackType='bam',
+        #     visibility='pack',
+        #     priority=self.priority,
+        # )
+        #
+        # # Return the Bam Track Object
+        # self.track = Track(
+        #     trackFile=bam_index_file_path,
+        #     trackDb=trackDb,
+        # )
 
-        trackDb = TrackDb(
-            trackName=self.name_bam,
-            longLabel=self.name_bam,
-            shortLabel=self.getShortName( self.name_bam ),
-            trackDataURL=dataURL,
-            trackType='bam',
-            visibility='pack',
-            priority=self.priority,
-        )
-
-        # Return the Bam Track Object
-        self.track = Track(
-            trackFile=bam_index_file_path,
-            trackDb=trackDb,
-        )
-
-        print("- %s created in %s" % (self.name_bam, bam_file_path))
-        print("- %s created in %s" % (self.index_bam, bam_index_file_path))
+        print("- Bam %s created" % self.name_bam)
+        #print("- %s created in %s" % (self.name_bam, bam_file_path))
+        #print("- %s created in %s" % (self.index_bam, bam_index_file_path))
Binary file Bam.pyc has changed
--- a/Bed.py	Thu Jul 21 05:58:51 2016 -0400
+++ b/Bed.py	Wed Jul 27 10:10:49 2016 -0400
@@ -36,23 +36,30 @@
             subtools.bedToBigBed(self.sortedBedFile.name, self.chromSizesFile.name, self.bigBedFile.name)
 
         # Create the Track Object
-        dataURL = "tracks/%s" % trackName
+        self.createTrack(file_path=trackName,
+                         track_name=trackName,
+                         long_label=self.name_bed_generic, track_type='bigBed', visibility='dense',
+                         priority=self.priority,
+                         track_file=myBigBedFilePath)
 
-        trackDb = TrackDb(
-            trackName=trackName,
-            longLabel=self.name_bed_generic,
-            shortLabel=self.getShortName(self.name_bed_generic),
-            trackDataURL=dataURL,
-            trackType='bigBed',
-            visibility='dense',
-            thickDrawItem='on',
-            priority=self.priority,
-        )
+        # dataURL = "tracks/%s" % trackName
+        #
+        # trackDb = TrackDb(
+        #     trackName=trackName,
+        #     longLabel=self.name_bed_generic,
+        #     shortLabel=self.getShortName(self.name_bed_generic),
+        #     trackDataURL=dataURL,
+        #     trackType='bigBed',
+        #     visibility='dense',
+        #     thickDrawItem='on',
+        #     priority=self.priority,
+        # )
+        #
+        # # Return the BigBed track
+        # self.track = Track(
+        #     trackFile=myBigBedFilePath,
+        #     trackDb=trackDb,
+        # )
 
-        # Return the BigBed track
-        self.track = Track(
-            trackFile=myBigBedFilePath,
-            trackDb=trackDb,
-        )
-
-        print("- %s created in %s" % (trackName, myBigBedFilePath))
+        print("- Bed %s created" % self.name_bed_generic)
+        #print("- %s created in %s" % (trackName, myBigBedFilePath))
Binary file Bed.pyc has changed
--- a/BedSimpleRepeats.py	Thu Jul 21 05:58:51 2016 -0400
+++ b/BedSimpleRepeats.py	Wed Jul 27 10:10:49 2016 -0400
@@ -34,21 +34,28 @@
                                  autoSql=auto_sql_option)
 
         # Create the Track Object
-        dataURL = "tracks/%s" % trackName
+        self.createTrack(file_path=trackName,
+                         track_name=trackName,
+                         long_label=self.name_bed_simple_repeats, track_type='bigBed 4 +', visibility='dense',
+                         priority=self.priority,
+                         track_file=myBigBedFilePath)
 
-        trackDb = TrackDb(
-            trackName=trackName,
-            longLabel=self.name_bed_simple_repeats,
-            shortLabel=self.getShortName( self.name_bed_simple_repeats ),
-            trackDataURL=dataURL,
-            trackType='bigBed 4 +',
-            visibility='dense',
-            priority=self.priority,
-        )
+        # dataURL = "tracks/%s" % trackName
+        #
+        # trackDb = TrackDb(
+        #     trackName=trackName,
+        #     longLabel=self.name_bed_simple_repeats,
+        #     shortLabel=self.getShortName( self.name_bed_simple_repeats ),
+        #     trackDataURL=dataURL,
+        #     trackType='bigBed 4 +',
+        #     visibility='dense',
+        #     priority=self.priority,
+        # )
+        #
+        # self.track = Track(
+        #     trackFile=myBigBedFilePath,
+        #     trackDb=trackDb,
+        # )
 
-        self.track = Track(
-            trackFile=myBigBedFilePath,
-            trackDb=trackDb,
-        )
-
-        print("- %s created in %s" % (trackName, myBigBedFilePath))
+        print("- Bed simple repeats %s created" % self.name_bed_simple_repeats)
+        #print("- %s created in %s" % (trackName, myBigBedFilePath))
Binary file BedSimpleRepeats.pyc has changed
--- a/BigWig.py	Thu Jul 21 05:58:51 2016 -0400
+++ b/BigWig.py	Wed Jul 27 10:10:49 2016 -0400
@@ -19,7 +19,7 @@
         self.name_bigwig = data_bigwig["name"]
         self.priority = data_bigwig["order_index"]
 
-        print "Creating TrackHub BigWig from (falsePath: %s; name: %s)" % ( self.input_bigwig_path, self.name_bigwig )
+        #print "Creating TrackHub BigWig from (falsePath: %s; name: %s)" % ( self.input_bigwig_path, self.name_bigwig )
 
         trackName = "".join( ( self.name_bigwig, ".bigwig" ) )
 
@@ -27,22 +27,30 @@
         shutil.copy(self.input_bigwig_path, myBigWigFilePath)
 
         # Create the Track Object
-        dataURL = "tracks/%s" % trackName
+        self.createTrack(file_path=trackName,
+                         track_name=trackName,
+                         long_label=self.name_bigwig, track_type='bigwig', visibility='full',
+                         priority=self.priority,
+                         track_file=myBigWigFilePath)
 
-        # Return the BigBed track
-        trackDb = TrackDb(
-            trackName=trackName,
-            longLabel=self.name_bigwig,
-            shortLabel=self.getShortName( self.name_bigwig ),
-            trackDataURL=dataURL,
-            trackType='bigWig',
-            visibility='full',
-            priority=self.priority,
-        )
+        # dataURL = "tracks/%s" % trackName
+        #
+        # # Return the BigBed track
+        #
+        # trackDb = TrackDb(
+        #     trackName=trackName,
+        #     longLabel=self.name_bigwig,
+        #     shortLabel=self.getShortName( self.name_bigwig ),
+        #     trackDataURL=dataURL,
+        #     trackType='bigWig',
+        #     visibility='full',
+        #     priority=self.priority,
+        # )
+        #
+        # self.track = Track(
+        #     trackFile=myBigWigFilePath,
+        #     trackDb=trackDb,
+        # )
 
-        self.track = Track(
-            trackFile=myBigWigFilePath,
-            trackDb=trackDb,
-        )
-
-        print("- %s created in %s" % (trackName, myBigWigFilePath))
+        print("- BigWig %s created" % self.name_bigwig)
+        #print("- %s created in %s" % (trackName, myBigWigFilePath))
Binary file BigWig.pyc has changed
--- a/Datatype.py	Thu Jul 21 05:58:51 2016 -0400
+++ b/Datatype.py	Wed Jul 27 10:10:49 2016 -0400
@@ -9,6 +9,8 @@
 import tempfile
 
 from util import subtools
+from Track import Track
+from TrackDb import TrackDb
 
 
 class Datatype(object):
@@ -25,6 +27,8 @@
     twoBitFile = None
     chromSizesFile = None
 
+    track = None
+
     def __init__(self):
 
         not_init_message = "The {0} is not initialized." \
@@ -37,6 +41,7 @@
             raise TypeError(not_init_message.format('tool directory'))
 
 
+
     @staticmethod
     def pre_init(reference_genome, two_bit_path, chrom_sizes_file,
                  extra_files_path, tool_directory, specie_folder, tracks_folder):
@@ -66,3 +71,39 @@
         short_label_slice = slice(0, 15)
 
         return name_to_shortify[short_label_slice]
+
+    # TODO: Better handle parameters, use heritance mecanism
+    # TODO: Use default parameters for some, like visibility
+    def createTrack(self,
+                    file_path=None,
+                    track_name=None, long_label=None, thick_draw_item='off',
+                    short_label=None, track_type=None, visibility=None, priority=None,
+                    track_file=None):
+
+        # TODO: Remove the hardcoded "tracks" by the value used as variable from myTrackFolderPath
+        data_url = "tracks/%s" % file_path
+
+        if not short_label:
+            short_label = self.getShortName(long_label)
+
+        # Replace '_' by ' ', to invert the sanitization mecanism
+        # TODO: Find a better way to manage the sanitization of file path
+        long_label = long_label.replace("_", " ")
+        short_label = short_label.replace("_", " ")
+
+        track_db = TrackDb(
+                trackName=track_name,
+                longLabel=long_label,
+                shortLabel=short_label,
+                trackDataURL=data_url,
+                trackType=track_type,
+                visibility=visibility,
+                thickDrawItem=thick_draw_item,
+                priority=priority,
+        )
+
+        # Return the Bam Track Object
+        self.track = Track(
+                trackFile=track_file,
+                trackDb=track_db,
+        )
Binary file Datatype.pyc has changed
--- a/Gff3.py	Thu Jul 21 05:58:51 2016 -0400
+++ b/Gff3.py	Wed Jul 27 10:10:49 2016 -0400
@@ -47,21 +47,27 @@
             subtools.bedToBigBed(sortedBedFile.name, self.chromSizesFile.name, bigBedFile.name)
 
         # Create the Track Object
-        dataURL = "tracks/%s" % trackName
+        self.createTrack(file_path=trackName,
+                         track_name=trackName,
+                         long_label=self.name_gff3, track_type='bigBed 12 +', visibility='dense', priority=self.priority,
+                         track_file=myBigBedFilePath)
 
-        trackDb = TrackDb(
-            trackName=trackName,
-            longLabel=self.name_gff3,
-            shortLabel=self.getShortName( self.name_gff3 ),
-            trackDataURL=dataURL,
-            trackType='bigBed 12 +',
-            visibility='dense',
-            priority=self.priority,
-        )
+        # dataURL = "tracks/%s" % trackName
+        #
+        # trackDb = TrackDb(
+        #     trackName=trackName,
+        #     longLabel=self.name_gff3,
+        #     shortLabel=self.getShortName( self.name_gff3 ),
+        #     trackDataURL=dataURL,
+        #     trackType='bigBed 12 +',
+        #     visibility='dense',
+        #     priority=self.priority,
+        # )
+        #
+        # self.track = Track(
+        #     trackFile=myBigBedFilePath,
+        #     trackDb=trackDb,
+        # )
 
-        self.track = Track(
-            trackFile=myBigBedFilePath,
-            trackDb=trackDb,
-        )
-
-        print("- %s created in %s" % (trackName, myBigBedFilePath))
+        print("- Gff3 %s created" % self.name_gff3)
+        #print("- %s created in %s" % (trackName, myBigBedFilePath))
Binary file Gff3.pyc has changed
--- a/Gtf.py	Thu Jul 21 05:58:51 2016 -0400
+++ b/Gtf.py	Wed Jul 27 10:10:49 2016 -0400
@@ -21,7 +21,7 @@
         self.name_gtf = data_gtf["name"]
         self.priority = data_gtf["order_index"]
 
-        print "Creating TrackHub GTF from (falsePath: %s; name: %s)" % ( self.input_gtf_false_path, self.name_gtf)
+        #print "Creating TrackHub GTF from (falsePath: %s; name: %s)" % ( self.input_gtf_false_path, self.name_gtf)
 
         # TODO: See if we need these temporary files as part of the generated files
         genePredFile = tempfile.NamedTemporaryFile(bufsize=0, suffix=".genePred")
@@ -46,20 +46,26 @@
             subtools.bedToBigBed(sortedBedFile.name, self.chromSizesFile.name, bigBedFile.name)
 
         # Create the Track Object
-        dataURL = "tracks/%s" % trackName
+        self.createTrack(file_path=trackName,
+                         track_name=trackName,
+                         long_label=self.name_gtf, track_type='bigBed 12 +', visibility='dense', priority=self.priority,
+                         track_file=myBigBedFilePath)
+        #
+        # dataURL = "tracks/%s" % trackName
+        #
+        # trackDb = TrackDb(
+        #     trackName=trackName,
+        #     longLabel=self.name_gtf,
+        #     shortLabel=self.getShortName( self.name_gtf ),
+        #     trackDataURL=dataURL,
+        #     trackType='bigBed 12 +',
+        #     visibility='dense',
+        #     priority=self.priority,
+        # )
+        # self.track = Track(
+        #     trackFile=myBigBedFilePath,
+        #     trackDb=trackDb,
+        # )
 
-        trackDb = TrackDb(
-            trackName=trackName,
-            longLabel=self.name_gtf,
-            shortLabel=self.getShortName( self.name_gtf ),
-            trackDataURL=dataURL,
-            trackType='bigBed 12 +',
-            visibility='dense',
-            priority=self.priority,
-        )
-        self.track = Track(
-            trackFile=myBigBedFilePath,
-            trackDb=trackDb,
-        )
-
-        print("- %s created in %s" % (trackName, myBigBedFilePath))
+        print("- Gtf %s created" % self.name_gtf)
+        #print("- %s created in %s" % (trackName, myBigBedFilePath))
Binary file Gtf.pyc has changed
--- a/TrackHub.py	Thu Jul 21 05:58:51 2016 -0400
+++ b/TrackHub.py	Wed Jul 27 10:10:49 2016 -0400
@@ -28,6 +28,7 @@
         self.reference_genome = inputFastaFile
         # TODO: Add the specie name
         self.genome_name = inputFastaFile.assembly_id
+        self.specie_html = self.genome_name + '.html'
         self.default_pos = None
         self.user_email = user_email
 
@@ -148,8 +149,7 @@
         self.__fillHubTxt__(hubTxtFilePath)
 
         # Add the hub.html file
-        # TODO: Change the name and get it depending on the specie
-        hubHtmlFilePath = os.path.join(myHubPath, 'dbia.html')
+        hubHtmlFilePath = os.path.join(myHubPath, self.specie_html)
         self.__fillHubHtmlFile__(hubHtmlFilePath)
 
 
@@ -208,7 +208,7 @@
                 longLabel=self.genome_name,
                 genomesFile='genomes.txt',
                 email=self.user_email,
-                descriptionUrl='dbia.html'
+                descriptionUrl=self.specie_html
             )
             genomesTxtFile.write(htmlMakoRendered)
 
Binary file TrackHub.pyc has changed
--- a/hubArchiveCreator.py	Thu Jul 21 05:58:51 2016 -0400
+++ b/hubArchiveCreator.py	Wed Jul 27 10:10:49 2016 -0400
@@ -82,13 +82,16 @@
     input_fasta_file_name = sanitize_name_input(array_inputs_reference_genome["name"])
     genome_name = sanitize_name_input(args.genome_name)
 
-    reference_genome = Fasta(array_inputs_reference_genome["false_path"],
+    reference_genome = Fasta(input_fasta_file,
                              input_fasta_file_name, genome_name)
 
     user_email = args.user_email
 
-    # TODO: Add array for each input because we can add multiple -b for example + filter the data associated
 
+    # TODO: Use a class to have a better management of the structure of these inputs
+    # These inputs are populated in the Galaxy Wrapper xml and are in this format:
+    # ARRAY[DICT{FILE_PATH: DICT{NAME: NAME_VALUE, EXTRA_DATA: EXTRA_DATA_VALUE}}]
+    # EXTRA_DATA could be anything, for example the index of a BAM => {"index", FILE_PATH}
     array_inputs_gff3 = args.gff3
     array_inputs_bed_simple_repeats = args.bedSimpleRepeats
     array_inputs_bed_generic = args.bed
@@ -117,31 +120,14 @@
 
     all_datatype_dictionary = {}
 
-    datatype_parameters = (inputs_data, all_datatype_dictionary)
-
-    # Process Augustus
-    if array_inputs_gff3:
-        create_ordered_datatype_objects(Gff3, array_inputs_gff3, *datatype_parameters)
-
-    # Process Bed simple repeats
-    if array_inputs_bed_simple_repeats:
-        create_ordered_datatype_objects(BedSimpleRepeats, array_inputs_bed_simple_repeats, *datatype_parameters)
-
-    # Process Bed
-    if array_inputs_bed_generic:
-        create_ordered_datatype_objects(Bed, array_inputs_bed_generic, *datatype_parameters)
-
-    # Process GTF
-    if array_inputs_gtf:
-        create_ordered_datatype_objects(Gtf, array_inputs_gtf, *datatype_parameters)
-
-    # Process Bam
-    if array_inputs_bam:
-        create_ordered_datatype_objects(Bam, array_inputs_bam, *datatype_parameters)
-
-    # Process BigWig
-    if array_inputs_bigwig:
-        create_ordered_datatype_objects(BigWig, array_inputs_bigwig, *datatype_parameters)
+    for (inputs, datatype_class) in [(array_inputs_gff3, Gff3),
+                         (array_inputs_bed_simple_repeats, BedSimpleRepeats),
+                         (array_inputs_bed_generic, Bed),
+                         (array_inputs_gtf, Gtf),
+                         (array_inputs_bam, Bam),
+                         (array_inputs_bigwig, BigWig)]:
+        if inputs:
+            all_datatype_dictionary.update(create_ordered_datatype_objects(datatype_class, inputs, inputs_data))
 
     # Create Ordered Dictionary to add the tracks in the tool form order
     all_datatype_ordered_dictionary = collections.OrderedDict(all_datatype_dictionary)
@@ -155,13 +141,19 @@
     # We terminate le process and so create a HTML file summarizing all the files
     trackHub.terminate()
 
+    print "\t"
+    print "--------------"
+    print "Well done guys! Your data are ready to be displayed in UCSC Track Hub."
+
     sys.exit(0)
 
+
 def sanitize_name_input(string_to_sanitize):
         return string_to_sanitize \
             .replace("/", "_") \
             .replace(" ", "_")
 
+
 def sanitize_name_inputs(inputs_data):
     """
     Sometimes output from Galaxy, or even just file name from user have spaces
@@ -173,7 +165,7 @@
         inputs_data[key]["name"] = sanitize_name_input(inputs_data[key]["name"])
 
 
-def create_ordered_datatype_objects(ExtensionClass, array_inputs, inputs_data, all_datatype_dictionary):
+def create_ordered_datatype_objects(ExtensionClass, array_inputs, inputs_data):
     """
     Function which executes the creation all the necessary files / folders for a special Datatype, for TrackHub
     and update the dictionary of datatype
@@ -189,9 +181,8 @@
         for key, data_value in inputs_data.items():
             if key == input_false_path:
                 extensionObject = ExtensionClass(input_false_path, data_value)
-
                 datatype_dictionary.update({data_value["order_index"]: extensionObject})
-    all_datatype_dictionary.update(datatype_dictionary)
+    return datatype_dictionary
 
 if __name__ == "__main__":
     main(sys.argv)
--- a/hubArchiveCreator.xml	Thu Jul 21 05:58:51 2016 -0400
+++ b/hubArchiveCreator.xml	Wed Jul 27 10:10:49 2016 -0400
@@ -1,4 +1,4 @@
-<tool id="hubArchiveCreator" name="Hub Archive Creator" version="2.0.2">
+<tool id="hubArchiveCreator" name="Hub Archive Creator" version="2.3.0">
     <description>
         This Galaxy tool permits to prepare your files to be ready for
         Assembly Hub visualization.
@@ -11,9 +11,12 @@
         <requirement type="package" version="0.0.1">genePredToBed</requirement>
         <requirement type="package" version="1.2">samtools</requirement>
         <!-- Conda dependencies -->
+        <requirement type="package" version="332">ucsc-bedtobigbed</requirement>
+        <requirement type="package" version="332">ucsc-fatotwobit</requirement>
+        <requirement type="package" version="324">ucsc-genepredtobed</requirement>
         <requirement type="package" version="324">ucsc-gff3togenepred</requirement>
         <requirement type="package" version="324">ucsc-gtftogenepred</requirement>
-        <requirement type="package" version="324">ucsc-genepredtobed</requirement>
+        <requirement type="package" version="324">ucsc-twobitinfo</requirement>
         <requirement type="package" version="1.3.1">samtools</requirement>
     </requirements>
 
@@ -158,8 +161,8 @@
                 <when value="bed">
                     <conditional name="bedChoice">
                         <param name="bed_select" type="select" label="Bed Choice">
-                            <option value="bed" selected="true">Generic BED</option>
-                            <option value="bed_simple_repeats_option">BED simple repeats</option>
+                            <option value="bed" selected="true">BED Generic (bed3+)</option>
+                            <option value="bed_simple_repeats_option">BED Simple repeat (bed4+12 / simpleRepeat.as)</option>
                         </param>
                         <when value="bed">
                             <param
Binary file util/Fasta.pyc has changed
Binary file util/subtools.pyc has changed