changeset 24:fcc1021bd496 draft

planemo upload for repository https://github.com/goeckslab/hub-archive-creator commit 93e2e2fb59f99677425104a80c17f665fa7b2b4a-dirty
author yating-l
date Wed, 31 May 2017 11:35:16 -0400
parents 2677f1899aa8
children 99dad5f9444c
files Bam.pyc Bed.pyc BedBlastAlignments.pyc BedSimpleRepeats.pyc BedSpliceJunctions.pyc BigBed.py BigWig.py BigWig.pyc Datatype.pyc Gff3.pyc Gtf.pyc Psl.pyc bigPsl.pyc cytoBand.py cytoBandIdeo.as hubArchiveCreator.py hubArchiveCreator.xml templates/trackDb/layout.txt tool_dependencies.xml util/subtools.py
diffstat 20 files changed, 293 insertions(+), 56 deletions(-) [+]
line wrap: on
line diff
Binary file Bam.pyc has changed
Binary file Bed.pyc has changed
Binary file BedBlastAlignments.pyc has changed
Binary file BedSimpleRepeats.pyc has changed
Binary file BedSpliceJunctions.pyc has changed
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/BigBed.py	Wed May 31 11:35:16 2017 -0400
@@ -0,0 +1,79 @@
+#!/usr/bin/python
+
+import os
+import shutil
+from subprocess import Popen, PIPE
+import re
+
+# Internal dependencies
+from Datatype import Datatype
+
+class BigBed(Datatype):
+    """ Configurations for creating the bigBed evidence track """
+
+    def __init__(self, input_bigbed_path, data_bigbed):
+        super(BigBed, self).__init__()
+
+        self.track = None
+
+        self.input_bigbed_path = input_bigbed_path
+        self.name_bigbed = data_bigbed["name"]
+        self.priority = data_bigbed["order_index"]
+        self.track_color = data_bigbed["track_color"]
+        self.group_name = data_bigbed["group_name"]
+
+        track_name = "".join((self.name_bigbed, ".bigbed"))
+        if data_bigbed["long_label"]:
+            self.long_label = data_bigbed["long_label"]
+        else:
+            self.long_label = self.name_bigbed
+
+        bigbed_file_path = os.path.join(self.myTrackFolderPath, track_name)
+
+        track_type = self.determine_track_type(input_bigbed_path)
+
+        shutil.copy(self.input_bigbed_path, bigbed_file_path)
+
+        # Create the Track Object
+        self.createTrack(file_path=track_name,
+                         track_name=track_name,
+                         long_label=self.long_label,
+                         track_type=track_type,
+                         visibility='hide',
+                         priority=self.priority,
+                         track_file=bigbed_file_path,
+                         track_color=self.track_color,
+                         group_name=self.group_name)
+
+        print "- BigBed %s created" % self.name_bigbed
+
+
+    def determine_track_type(self, bb_file):
+        """
+        Determine the number of standard and extra fields using bigBedSummary
+
+        Implementation of reading from stdout is based on a Stackoverflow post:
+        http://stackoverflow.com/questions/2715847/python-read-streaming-input-from-subprocess-communicate
+
+        :param bb_file: path to a bigBed file
+
+        :returns: the bigBed track type
+        """
+
+        cmd_ph = Popen(["bigBedSummary", "-fields", bb_file, "stdout"],
+                       stdout=PIPE, bufsize=1)
+
+        pattern = r"(\d+) bed definition fields, (\d+) total fields"
+
+        with cmd_ph.stdout:
+            for line in iter(cmd_ph.stdout.readline, b''):
+                match = re.match(pattern, line)
+
+                if match:
+                    extra_mark = "." if match.group(1) == match.group(2) else "+"
+                    bed_type = "bigBed %s %s" % (match.group(1), extra_mark)
+                    break
+
+        cmd_ph.wait()
+
+        return bed_type
--- a/BigWig.py	Tue May 09 15:42:43 2017 -0400
+++ b/BigWig.py	Wed May 31 11:35:16 2017 -0400
@@ -2,6 +2,8 @@
 
 import os
 import shutil
+from subprocess import Popen, PIPE
+import re
 
 # Internal dependencies
 from Datatype import Datatype
@@ -36,30 +38,37 @@
         self.createTrack(file_path=trackName,
                          track_name=trackName,
                          long_label=self.long_label,
-                         track_type='bigWig', visibility='full',
+                         track_type=self.determine_track_type(myBigWigFilePath),
+                         visibility='full',
                          priority=self.priority,
                          track_file=myBigWigFilePath,
                          track_color=self.track_color,
                          group_name=self.group_name)
 
-        # dataURL = "tracks/%s" % trackName
-        #
-        # # Return the BigBed track
-        #
-        # trackDb = TrackDb(
-        #     trackName=trackName,
-        #     longLabel=self.name_bigwig,
-        #     shortLabel=self.getShortName( self.name_bigwig ),
-        #     trackDataURL=dataURL,
-        #     trackType='bigWig',
-        #     visibility='full',
-        #     priority=self.priority,
-        # )
-        #
-        # self.track = Track(
-        #     trackFile=myBigWigFilePath,
-        #     trackDb=trackDb,
-        # )
-
         print("- BigWig %s created" % self.name_bigwig)
         #print("- %s created in %s" % (trackName, myBigWigFilePath))
+
+    def determine_track_type(self, bw_file):
+        """
+        bigWig tracks must declare the expected signal range for the data
+        (See https://genome.ucsc.edu/goldenpath/help/trackDb/trackDbHub.html).
+        This method determines the range of values for a bigWig file using
+        the bigWigInfo program.
+
+        Implementation of reading from stdout is based on a Stackoverflow post:
+        http://stackoverflow.com/questions/2715847/python-read-streaming-input-from-subprocess-communicate
+
+        :param bw_file: path to a bigWig file
+
+        :returns: the bigWig track type
+        """
+        cmd_ph = Popen(["bigWigInfo", "-minMax", bw_file],
+                       stdout=PIPE, bufsize=1)
+
+        with cmd_ph.stdout:
+            for line in iter(cmd_ph.stdout.readline, b''):
+                bw_type = "bigWig %s" % line.rstrip()
+
+        cmd_ph.wait()
+
+        return bw_type
Binary file BigWig.pyc has changed
Binary file Datatype.pyc has changed
Binary file Gff3.pyc has changed
Binary file Gtf.pyc has changed
Binary file Psl.pyc has changed
Binary file bigPsl.pyc has changed
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/cytoBand.py	Wed May 31 11:35:16 2017 -0400
@@ -0,0 +1,75 @@
+#!/usr/bin/python
+
+import os
+import tempfile
+
+from Datatype import Datatype
+from Track import Track
+from TrackDb import TrackDb
+from util import subtools
+
+
+class cytoBand( Datatype ):
+    def __init__(self, input_bed_cytoBand_false_path, data_bed_cytoBand):
+
+        super(cytoBand, self).__init__()
+
+        self.input_bed_cytoBand_false_path = input_bed_cytoBand_false_path
+        self.name_bed_cytoBand = data_bed_cytoBand["name"]
+        self.priority = data_bed_cytoBand["order_index"]
+        self.track_color = data_bed_cytoBand["track_color"]
+        # TODO: Think about how to avoid repetition of the group_name everywhere
+        self.group_name = data_bed_cytoBand["group_name"]
+        if data_bed_cytoBand["long_label"]:
+            self.long_label = data_bed_cytoBand["long_label"]
+        else:
+            self.long_label = self.name_bed_cytoBand
+        sortedBedFile = tempfile.NamedTemporaryFile(suffix=".sortedBed")
+
+        # Sort processing
+        subtools.sort(self.input_bed_cytoBand_false_path, sortedBedFile.name)
+
+        # bedToBigBed processing
+        # TODO: Change the name of the bb, to tool + genome + .bb
+        trackName = "".join( ( self.name_bed_cytoBand, '.bb' ) )
+        myBigBedFilePath = os.path.join(self.myTrackFolderPath, trackName)
+
+        auto_sql_option = os.path.join(self.tool_directory, 'cytoBandIdeo.as')
+
+        with open(myBigBedFilePath, 'w') as bigBedFile:
+            subtools.bedToBigBed(sortedBedFile.name,
+                                 self.chromSizesFile.name,
+                                 bigBedFile.name,
+                                 typeOption='bed4',
+                                 autoSql=auto_sql_option)
+
+        # Create the Track Object
+        self.createTrack(file_path=trackName,
+                         track_name='cytoBandIdeo',
+                         long_label=self.long_label, 
+                         track_type='bigBed',
+                         visibility='dense',
+                         priority=self.priority,
+                         track_file=myBigBedFilePath,
+                         track_color=self.track_color,
+                         group_name=self.group_name)
+
+        # dataURL = "tracks/%s" % trackName
+        #
+        # trackDb = TrackDb(
+        #     trackName=trackName,
+        #     longLabel=self.name_bed_simple_repeats,
+        #     shortLabel=self.getShortName( self.name_bed_simple_repeats ),
+        #     trackDataURL=dataURL,
+        #     trackType='bigBed 4 +',
+        #     visibility='dense',
+        #     priority=self.priority,
+        # )
+        #
+        # self.track = Track(
+        #     trackFile=myBigBedFilePath,
+        #     trackDb=trackDb,
+        # )
+
+        print("- Bed splice junctions %s created" % self.name_bed_cytoBand)
+        #print("- %s created in %s" % (trackName, myBigBedFilePath))
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/cytoBandIdeo.as	Wed May 31 11:35:16 2017 -0400
@@ -0,0 +1,9 @@
+table cytoBandIdeo
+"cytoBandIdeo Describes the positions of cytogenetic bands with a chromosome"
+    (
+    string chrom;      "Reference sequence chromosome or scaffold"
+    uint   chromStart; "Start position in chromosome"
+    uint   chromEnd;   "End position in chromosome"
+    string name;       "Name of item"
+    string   gieStain; "Giemsa stain results: gneg gpos25 gpos50 gpos75 gpos100 acen gvar stalk"
+    )
\ No newline at end of file
--- a/hubArchiveCreator.py	Tue May 09 15:42:43 2017 -0400
+++ b/hubArchiveCreator.py	Wed May 31 11:35:16 2017 -0400
@@ -20,6 +20,7 @@
 from BedSimpleRepeats import BedSimpleRepeats
 from BedSpliceJunctions import BedSpliceJunctions
 from Bed import Bed
+from cytoBand import cytoBand
 from BigWig import BigWig
 from util.Fasta import Fasta
 from util.Filters import TraceBackFormatter
@@ -29,6 +30,7 @@
 from TrackHub import TrackHub
 from bigPsl import bigPsl
 from BedBlastAlignments import BedBlastAlignments
+from BigBed import BigBed
 
 # TODO: Verify each subprocessed dependency is accessible [gff3ToGenePred, genePredToBed, twoBitInfo, faToTwoBit, bedToBigBed, sort
 
@@ -55,6 +57,9 @@
     # Generic Bed (Blastx transformed to bed)
     parser.add_argument('--bed', action='append', help='Bed generic format')
 
+    #cytoBandIdeo
+    parser.add_argument('--cytoBand', action='append', help='Cytoband Track, using cytoBandIdeo.as')
+
     # BigPsl (blat alignment)
     parser.add_argument('--bigpsl', action='append', help='bigPsl format, using bigPsl.as')
 
@@ -70,6 +75,9 @@
     # Psl Management
     parser.add_argument('--psl', action='append', help='Psl format')
 
+    # BigBed Management
+    parser.add_argument('--bigbed', action='append', help='BigBed format')
+
     # TODO: Check if the running directory can have issues if we run the tool outside
     parser.add_argument('-d', '--directory',
                         help='Running tool directory, where to find the templates. Default is running directory')
@@ -124,6 +132,7 @@
     # EXTRA_DATA could be anything, for example the index of a BAM => {"index", FILE_PATH}
     array_inputs_bam = args.bam
     array_inputs_bed_generic = args.bed
+    array_inputs_bed_cytoBand = args.cytoBand
     array_inputs_bed_simple_repeats = args.bedSimpleRepeats
     array_inputs_bed_splice_junctions = args.bedSpliceJunctions
     array_inputs_bigwig = args.bigwig
@@ -132,6 +141,7 @@
     array_inputs_psl = args.psl
     array_inputs_bigpsl = args.bigpsl
     array_inputs_bed_blast_alignments = args.bedBlastAlignments
+    array_inputs_bigbed = args.bigbed
 
     outputFile = args.output
 
@@ -152,6 +162,7 @@
     for (inputs, datatype_class) in [
                         (array_inputs_bam, Bam),
                         (array_inputs_bed_generic, Bed),
+                        (array_inputs_bed_cytoBand, cytoBand),
                         (array_inputs_bigwig, BigWig),
                         (array_inputs_bed_simple_repeats, BedSimpleRepeats),
                         (array_inputs_bed_splice_junctions, BedSpliceJunctions),
@@ -159,7 +170,8 @@
                         (array_inputs_gtf, Gtf),
                         (array_inputs_psl, Psl),
                         (array_inputs_bigpsl, bigPsl),
-                        (array_inputs_bed_blast_alignments, BedBlastAlignments)]:
+                        (array_inputs_bed_blast_alignments, BedBlastAlignments),
+                        (array_inputs_bigbed, BigBed)]:
         if inputs:
             all_datatype_dictionary.update(create_ordered_datatype_objects(datatype_class, inputs, inputs_data))
 
--- a/hubArchiveCreator.xml	Tue May 09 15:42:43 2017 -0400
+++ b/hubArchiveCreator.xml	Wed May 31 11:35:16 2017 -0400
@@ -5,8 +5,10 @@
     </description>
 
     <requirements>
-        <requirement type="package" version="1.0">ucsc_tools_340</requirement>
+        <requirement type="package" version="340">ucsc_hac</requirement>
         <requirement type="package" version="1.2">samtools</requirement>
+        <requirement type="package" version="340">ucsc_bigwig</requirement>
+        <requirement type="package" version="340">ucsc_bigbed</requirement>
     </requirements>
 
     <stdio>
@@ -90,6 +92,11 @@
                         #silent $prepare_json($f.formatChoice.bedChoice.BED, $index_track_final,
                                                 extra_data_dict)
                     #end if
+                    #if $f.formatChoice.bedChoice.bed_select == "bed_cytoBand"
+                        --cytoBand $f.formatChoice.bedChoice.BED_cytoBand
+                        #silent $prepare_json($f.formatChoice.bedChoice.BED_cytoBand, $index_track_final,
+                                                extra_data_dict)
+                    #end if
                     #if $f.formatChoice.bedChoice.bed_select == "bed_simple_repeats_option"
                         --bedSimpleRepeats $f.formatChoice.bedChoice.BED_simple_repeats
                         #silent $prepare_json($f.formatChoice.bedChoice.BED_simple_repeats, $index_track_final,
@@ -102,12 +109,12 @@
                     #end if
                     #if $f.formatChoice.bedChoice.bed_select == "bed_blast_alignment_option"
                         --bedBlastAlignments $f.formatChoice.bedChoice.BED_blast_alignment
-                        #silent $prepare_json($f.formatChoice.bedChoice.BED_blast_alignment, $index_track_final,                              
+                        #silent $prepare_json($f.formatChoice.bedChoice.BED_blast_alignment, $index_track_final,
                                              extra_data_dict)
                     #end if
                     #if $f.formatChoice.bedChoice.bed_select == "bed_blat_alignment_option"
                         --bigpsl $f.formatChoice.bedChoice.BED_blat_alignment
-                        #silent $prepare_json($f.formatChoice.bedChoice.BED_blat_alignment, $index_track_final,                              
+                        #silent $prepare_json($f.formatChoice.bedChoice.BED_blat_alignment, $index_track_final,
                                              extra_data_dict)
                     #end if
                 #end if
@@ -121,6 +128,11 @@
                     #silent $prepare_json($f.formatChoice.BIGWIG, $index_track_final,
                                             extra_data_dict)
                 #end if
+                #if $f.formatChoice.format_select == "bigbed"
+                    --bigbed $f.formatChoice.BIGBED
+                    #silent $prepare_json($f.formatChoice.BIGBED, $index_track_final,
+                                            extra_data_dict)
+                #end if
                 #if $f.formatChoice.format_select == "gff3"
                     --gff3 $f.formatChoice.GFF3
                     #silent $prepare_json($f.formatChoice.GFF3, $index_track_final,
@@ -180,6 +192,7 @@
                         <option value="bed">BED</option>
                         <option value="psl">PSL</option>
                         <option value="bigwig">BIGWIG</option>
+                        <option value="bigbed">BIGBED</option>
                         <option value="gff3">GFF3</option>
                         <option value="gtf">GTF</option>
                     </param>
@@ -200,11 +213,12 @@
                                 </valid>
                             </sanitizer>
                         </param>
-                    </when>     
+                    </when>
                     <when value="bed">
                         <conditional name="bedChoice">
                             <param name="bed_select" type="select" label="Bed Choice">
                                 <option value="bed_generic" selected="true">BED Generic (bed3+)</option>
+                                <option value="bed_cytoBand">cytoBand Track (bed4)</option>
                                 <option value="bed_simple_repeats_option">BED Simple repeat (bed4+12 / simpleRepeat.as)</option>
                                 <option value="bed_splice_junctions_option">BED Splice junctions (bed12+1 / spliceJunctions.as)</option>
                                 <option value="bed_blast_alignment_option">Blast alignments (bed12+12 / bigPsl.as)</option>
@@ -219,6 +233,14 @@
                                 />
 
                             </when>
+                            <when value="bed_cytoBand">
+                                <param
+                                        format="bed"
+                                        name="BED_cytoBand"
+                                        type="data"
+                                        label="cytoBand Track (Bed4)"
+                                />
+                            </when>
                             <when value="bed_simple_repeats_option">
                                 <param
                                         format="bed"
@@ -293,6 +315,22 @@
                             </sanitizer>
                         </param>
                     </when>
+                    <when value="bigbed">
+                        <param
+                                format="bigbed"
+                                name="BIGBED"
+                                type="data"
+                                label="BIGBED File"
+                        />
+                        <param name="longLabel" type="text" size="30" label="Track name" />
+                        <param name="track_color" type="color" label="Track color" value="#000000">
+                            <sanitizer>
+                                <valid initial="string.letters,string.digits">
+                                    <add value="#"/>
+                                </valid>
+                            </sanitizer>
+                        </param>
+                    </when>
                     <when value="gff3">
                         <param
                                 format="gff3"
@@ -474,7 +512,7 @@
                 />
             </output>
         </test>
-        
+
 
         <!-- Test with Psl -->
         <test>
@@ -1029,4 +1067,8 @@
         This Galaxy tool permits to prepare your files to be ready for
         Assembly Hub visualization.
     </help>
+
+    <citations>
+        <citation type="doi">10.7490/f1000research.1112719.1</citation>
+    </citations>
 </tool>
--- a/templates/trackDb/layout.txt	Tue May 09 15:42:43 2017 -0400
+++ b/templates/trackDb/layout.txt	Wed May 31 11:35:16 2017 -0400
@@ -1,14 +1,32 @@
 % for trackDb in trackDbs:
-    ## See this http://genome.ucsc.edu/goldenPath/help/hgTrackHubHelp.html
-    track ${trackDb.trackName}
-    longLabel ${trackDb.longLabel}
-    shortLabel ${trackDb.shortLabel}
-    bigDataUrl ${trackDb.trackDataURL}
-    type ${trackDb.trackType}
-    visibility ${trackDb.visibility}
-    thickDrawItem ${trackDb.thickDrawItem}
-    priority ${trackDb.priority}
-    color ${trackDb.track_color}
-    group ${trackDb.group_name.lower().replace(' ', '_')}
+    % if "bigWig" in trackDb.trackType:
+
+track ${trackDb.trackName}
+longLabel ${trackDb.longLabel}
+shortLabel ${trackDb.shortLabel}
+bigDataUrl ${trackDb.trackDataURL}
+visibility ${trackDb.visibility}
+priority ${trackDb.priority}
+color ${trackDb.track_color}
+group ${trackDb.group_name.lower().replace(' ', '_')}
+type ${trackDb.trackType}
+autoScale on
+maxHeightPixels 100:32:8
+windowingFunction mean+whiskers
 
+    % else:
+
+## See this http://genome.ucsc.edu/goldenPath/help/hgTrackHubHelp.html
+track ${trackDb.trackName}
+longLabel ${trackDb.longLabel}
+shortLabel ${trackDb.shortLabel}
+bigDataUrl ${trackDb.trackDataURL}
+type ${trackDb.trackType}
+visibility ${trackDb.visibility}
+thickDrawItem ${trackDb.thickDrawItem}
+priority ${trackDb.priority}
+color ${trackDb.track_color}
+group ${trackDb.group_name.lower().replace(' ', '_')}
+
+    % endif
 % endfor
--- a/tool_dependencies.xml	Tue May 09 15:42:43 2017 -0400
+++ b/tool_dependencies.xml	Wed May 31 11:35:16 2017 -0400
@@ -1,6 +1,5 @@
 <?xml version="1.0"?>
 <tool_dependency>
-    <!-- UCSC Tools in  -->
     <!-- Useful for HAC are:
         - twoBitInfo
         - sort
@@ -16,23 +15,15 @@
     <package name="samtools" version="1.2">
         <repository changeset_revision="f6ae3ba3f3c1" name="package_samtools_1_2" owner="iuc" prior_installation_required="True" toolshed="https://toolshed.g2.bx.psu.edu" />
     </package>
-    
-    <package name="ucsc_tools_340" version="1.0">
-        <install version="1.0">
-            <actions_group>
-                <actions architecture="x86_64" os="linux">
-                    <action type="download_by_url">http://old-gep.wustl.edu/~galaxy/ucsc_tools_340.tar.gz</action>
-                    <action type="move_directory_files">
-                        <source_directory>.</source_directory>                       
-                        <destination_directory>$INSTALL_DIR/bin</destination_directory>
-                    </action>    
-                </actions>
-                <action type="set_environment">
-                    <environment_variable action="prepend_to" name="PATH">$INSTALL_DIR/bin</environment_variable>
-                </action>
-            </actions_group>
-         </install>
-         <readme>The well known UCSC tools from Jim Kent.</readme>
+    <package name="ucsc_hac" version="340">
+        <repository changeset_revision="a819439b6d76" name="package_ucsc_hac_340" owner="yating-l" toolshed="https://toolshed.g2.bx.psu.edu" />
+    </package>
+    <package name="ucsc_bigbed" version="340">
+        <repository changeset_revision="b23d341df89d" name="package_ucsc_bigbed_340" owner="yating-l" toolshed="https://toolshed.g2.bx.psu.edu" />
     </package>
-    
+
+    <package name="ucsc_bigwig" version="340">      
+        <repository changeset_revision="b1fb46f92da0" name="package_ucsc_bigwig_340" owner="yating-l" toolshed="https://toolshed.g2.bx.psu.edu" />
+    </package>
+
 </tool_dependency>
--- a/util/subtools.py	Tue May 09 15:42:43 2017 -0400
+++ b/util/subtools.py	Wed May 31 11:35:16 2017 -0400
@@ -250,6 +250,8 @@
 # See the "track" Common settings at:
 #https://genome.ucsc.edu/goldenpath/help/trackDb/trackDbHub.html#bigPsl_-_Pairwise_Alignments
 def fixName(filename):
+    if filename == 'cytoBandIdeo':
+        return filename
     valid_chars = "_%s%s" % (string.ascii_letters, string.digits)
     sanitize_name = ''.join([c if c in valid_chars else '_' for c in filename])
     sanitize_name = "gonramp_" + sanitize_name