# HG changeset patch
# User rmarenco
# Date 1474817138 14400
# Node ID 3233451a3bd6e188873b6b7d03f4f9210ff1e3e2
# Parent 2a45cd656e8e61fa4564d5f2d361715228073407
planemo upload for repository https://github.com/goeckslab/hub-archive-creator commit fc73ec22a0db3ab09c4ac13dc58f0b54ae37845c
diff -r 2a45cd656e8e -r 3233451a3bd6 Bam.py
--- a/Bam.py Fri Sep 02 15:41:51 2016 -0400
+++ b/Bam.py Sun Sep 25 11:25:38 2016 -0400
@@ -5,6 +5,7 @@
Class to handle Bam files to UCSC TrackHub
"""
+import logging
import os
import shutil
@@ -31,6 +32,10 @@
self.priority = self.data_bam["order_index"]
self.index_bam = self.data_bam["index"]
+ # TODO: Think about how to avoir repetition of the color treatment
+ self.track_color = self.data_bam["track_color"]
+
+ logging.debug("Color of bam: {0}".format(self.track_color))
#print "Creating TrackHub BAM from (falsePath: %s; name: %s)" % ( self.input_bam_false_path, self.name_bam)
@@ -49,7 +54,8 @@
self.createTrack(file_path=self.name_bam,
track_name=self.name_bam,
long_label=self.name_bam, track_type='bam', visibility='pack', priority=self.priority,
- track_file=bam_index_file_path)
+ track_file=bam_index_file_path,
+ track_color=self.track_color)
#
# dataURL = "tracks/%s" % self.name_bam
#
diff -r 2a45cd656e8e -r 3233451a3bd6 Bam.pyc
Binary file Bam.pyc has changed
diff -r 2a45cd656e8e -r 3233451a3bd6 Bed.py
--- a/Bed.py Fri Sep 02 15:41:51 2016 -0400
+++ b/Bed.py Sun Sep 25 11:25:38 2016 -0400
@@ -23,6 +23,7 @@
self.data_bed_generic = data_bed_generic
self.name_bed_generic = self.data_bed_generic["name"]
self.priority = self.data_bed_generic["order_index"]
+ self.track_color = self.data_bed_generic["track_color"]
# Sort processing
subtools.sort(self.inputBedGeneric, self.sortedBedFile.name)
@@ -42,7 +43,8 @@
track_name=trackName,
long_label=self.name_bed_generic, track_type='bigBed', visibility='dense',
priority=self.priority,
- track_file=myBigBedFilePath)
+ track_file=myBigBedFilePath,
+ track_color=self.track_color)
# dataURL = "tracks/%s" % trackName
#
diff -r 2a45cd656e8e -r 3233451a3bd6 Bed.pyc
Binary file Bed.pyc has changed
diff -r 2a45cd656e8e -r 3233451a3bd6 BedSimpleRepeats.py
--- a/BedSimpleRepeats.py Fri Sep 02 15:41:51 2016 -0400
+++ b/BedSimpleRepeats.py Sun Sep 25 11:25:38 2016 -0400
@@ -17,6 +17,7 @@
self.input_bed_simple_repeats_false_path = input_bed_simple_repeats_false_path
self.name_bed_simple_repeats = data_bed_simple_repeats["name"]
self.priority = data_bed_simple_repeats["order_index"]
+ self.track_color = data_bed_simple_repeats["track_color"]
sortedBedFile = tempfile.NamedTemporaryFile(suffix=".sortedBed")
@@ -42,7 +43,8 @@
track_name=trackName,
long_label=self.name_bed_simple_repeats, track_type='bigBed 4 +', visibility='dense',
priority=self.priority,
- track_file=myBigBedFilePath)
+ track_file=myBigBedFilePath,
+ track_color=self.track_color)
# dataURL = "tracks/%s" % trackName
#
diff -r 2a45cd656e8e -r 3233451a3bd6 BedSimpleRepeats.pyc
Binary file BedSimpleRepeats.pyc has changed
diff -r 2a45cd656e8e -r 3233451a3bd6 BigWig.py
--- a/BigWig.py Fri Sep 02 15:41:51 2016 -0400
+++ b/BigWig.py Sun Sep 25 11:25:38 2016 -0400
@@ -18,6 +18,7 @@
self.input_bigwig_path = input_bigwig_path
self.name_bigwig = data_bigwig["name"]
self.priority = data_bigwig["order_index"]
+ self.track_color = data_bigwig["track_color"]
#print "Creating TrackHub BigWig from (falsePath: %s; name: %s)" % ( self.input_bigwig_path, self.name_bigwig )
@@ -32,7 +33,8 @@
long_label=self.name_bigwig,
track_type='bigWig', visibility='full',
priority=self.priority,
- track_file=myBigWigFilePath)
+ track_file=myBigWigFilePath,
+ track_color=self.track_color)
# dataURL = "tracks/%s" % trackName
#
diff -r 2a45cd656e8e -r 3233451a3bd6 BigWig.pyc
Binary file BigWig.pyc has changed
diff -r 2a45cd656e8e -r 3233451a3bd6 Datatype.py
--- a/Datatype.py Fri Sep 02 15:41:51 2016 -0400
+++ b/Datatype.py Sun Sep 25 11:25:38 2016 -0400
@@ -76,7 +76,7 @@
file_path=None,
track_name=None, long_label=None, thick_draw_item='off',
short_label=None, track_type=None, visibility=None, priority=None,
- track_file=None):
+ track_file=None, track_color='#000000'):
# TODO: Remove the hardcoded "tracks" by the value used as variable from myTrackFolderPath
data_url = "tracks/%s" % file_path
@@ -89,6 +89,12 @@
long_label = long_label.replace("_", " ")
short_label = short_label.replace("_", " ")
+ #TODO: Check if rgb or hexa
+ # Convert hexa to rgb array
+ hexa_without_sharp = track_color.lstrip('#')
+ rgb_array = [int(hexa_without_sharp[i:i+2], 16) for i in (0, 2, 4)]
+ rgb_ucsc = ','.join(map(str, rgb_array))
+
track_db = TrackDb(
trackName=track_name,
longLabel=long_label,
@@ -98,6 +104,7 @@
visibility=visibility,
thickDrawItem=thick_draw_item,
priority=priority,
+ track_color=rgb_ucsc
)
# Return the Bam Track Object
diff -r 2a45cd656e8e -r 3233451a3bd6 Datatype.pyc
Binary file Datatype.pyc has changed
diff -r 2a45cd656e8e -r 3233451a3bd6 Gff3.py
--- a/Gff3.py Fri Sep 02 15:41:51 2016 -0400
+++ b/Gff3.py Sun Sep 25 11:25:38 2016 -0400
@@ -19,6 +19,7 @@
self.input_Gff3_false_path = input_Gff3_false_path
self.name_gff3 = data_gff3["name"]
self.priority = data_gff3["order_index"]
+ self.track_color = data_gff3["track_color"]
# TODO: See if we need these temporary files as part of the generated files
unsorted_genePred_file = tempfile.NamedTemporaryFile(bufsize=0, suffix=".genePred")
@@ -57,6 +58,7 @@
long_label=self.name_gff3,
track_type='bigGenePred', visibility='dense',
priority=self.priority,
- track_file=myBigBedFilePath)
+ track_file=myBigBedFilePath,
+ track_color=self.track_color)
print("- Gff3 %s created" % self.name_gff3)
diff -r 2a45cd656e8e -r 3233451a3bd6 Gff3.pyc
Binary file Gff3.pyc has changed
diff -r 2a45cd656e8e -r 3233451a3bd6 Gtf.py
--- a/Gtf.py Fri Sep 02 15:41:51 2016 -0400
+++ b/Gtf.py Sun Sep 25 11:25:38 2016 -0400
@@ -20,6 +20,7 @@
self.input_gtf_false_path = input_gtf_false_path
self.name_gtf = data_gtf["name"]
self.priority = data_gtf["order_index"]
+ self.track_color = data_gtf["track_color"]
#print "Creating TrackHub GTF from (falsePath: %s; name: %s)" % ( self.input_gtf_false_path, self.name_gtf)
@@ -59,6 +60,7 @@
track_name=trackName,
long_label=self.name_gtf, track_type='bigGenePred',
visibility='dense', priority=self.priority,
- track_file=myBigBedFilePath)
+ track_file=myBigBedFilePath,
+ track_color=self.track_color)
print("- Gtf %s created" % self.name_gtf)
diff -r 2a45cd656e8e -r 3233451a3bd6 Gtf.pyc
Binary file Gtf.pyc has changed
diff -r 2a45cd656e8e -r 3233451a3bd6 Psl.py
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/Psl.py Sun Sep 25 11:25:38 2016 -0400
@@ -0,0 +1,57 @@
+import logging
+import os
+import tempfile
+
+# Internal dependencies
+from Datatype import Datatype
+from util import subtools
+
+
+class Psl(Datatype):
+ def __init__(self, input_psl_path, data_psl):
+ super(Psl, self).__init__()
+
+ self.track = None
+
+ self.input_psl_path = input_psl_path
+ self.name_psl = data_psl["name"]
+ self.priority = data_psl["order_index"]
+ self.track_color = data_psl["track_color"]
+
+ # Temporary files
+ unsorted_bed_formatted_psl_file = tempfile.NamedTemporaryFile(suffix='.psl')
+ sorted_bed_formatted_psl_file = tempfile.NamedTemporaryFile(suffix='psl')
+
+ # Get the bed12+12 with pslToBigPsl
+ subtools.pslToBigPsl(input_psl_path, unsorted_bed_formatted_psl_file.name)
+
+ # Sort the formatted psl into sorted_bed_formatted_psl_file
+ subtools.sort(unsorted_bed_formatted_psl_file.name, sorted_bed_formatted_psl_file.name)
+
+ # Get the binary indexed bigPsl with bedToBigBed
+ trackName = "".join((self.name_psl, ".bb"))
+
+ auto_sql_option = os.path.join(self.tool_directory, 'bigPsl.as')
+
+ my_big_psl_file_path = os.path.join(self.myTrackFolderPath, trackName)
+
+ logging.debug("Hello")
+
+ with open(my_big_psl_file_path, 'w') as big_psl_file:
+ subtools.bedToBigBed(sorted_bed_formatted_psl_file.name,
+ self.chromSizesFile.name,
+ big_psl_file.name,
+ autoSql=auto_sql_option,
+ typeOption='bed12+12',
+ tab=True)
+
+ # Create the Track Object
+ self.createTrack(file_path=trackName,
+ track_name=trackName,
+ long_label=self.name_psl,
+ track_type='bigPsl', visibility='dense',
+ priority=self.priority,
+ track_file=my_big_psl_file_path,
+ track_color=self.track_color)
+
+ print("- BigPsl %s created" % self.name_psl)
\ No newline at end of file
diff -r 2a45cd656e8e -r 3233451a3bd6 Psl.pyc
Binary file Psl.pyc has changed
diff -r 2a45cd656e8e -r 3233451a3bd6 TrackDb.py
--- a/TrackDb.py Fri Sep 02 15:41:51 2016 -0400
+++ b/TrackDb.py Sun Sep 25 11:25:38 2016 -0400
@@ -4,7 +4,7 @@
"""docstring for TrackDb"""
def __init__(self, trackName="", longLabel="", shortLabel="", trackDataURL="", trackType="", visibility="",
- thickDrawItem='off', priority="0"):
+ thickDrawItem='off', priority="0", track_color="#000000"):
super(TrackDb, self).__init__()
self.trackName = trackName
@@ -15,3 +15,4 @@
self.visibility = visibility
self.thickDrawItem = thickDrawItem
self.priority = priority
+ self.track_color = track_color
diff -r 2a45cd656e8e -r 3233451a3bd6 TrackDb.pyc
Binary file TrackDb.pyc has changed
diff -r 2a45cd656e8e -r 3233451a3bd6 TrackHub.py
--- a/TrackHub.py Fri Sep 02 15:41:51 2016 -0400
+++ b/TrackHub.py Sun Sep 25 11:25:38 2016 -0400
@@ -1,6 +1,7 @@
#!/usr/bin/python
# -*- coding: utf8 -*-
+import logging
import os
import tempfile
import shutil
@@ -88,14 +89,36 @@
# TODO: We are basically looping two times: One time with os.walk, Second time
# with the template. We could improve that if the number of files begins to be really important
list_relative_file_path = [ ]
+
+ # TODO: Create classes Tree to manage this => Better readibility and maintenability
+ def create_tree(array_path, tree, relative_array_file_path, level=0):
+ cur_relative_file_path = '/'.join(relative_array_file_path[:level+1])
+ if array_path[0] in tree.keys():
+ create_tree(array_path[1:], tree[array_path[0]][0],
+ relative_array_file_path, level+1)
+ else:
+ tree[array_path[0]] = ({}, cur_relative_file_path)
+ # TODO: Manage also the links of the directories => No link?
+ # => Managed in display.txt, but could also be managed there
+ # If we are don't have any sub-vertices
+ if len(array_path) == 1:
+ # We create the path to it
+ return
+ else:
+ create_tree(array_path[1:], tree[array_path[0]][0],
+ relative_array_file_path, level + 1)
+
+ walkable_tree = {}
for root, dirs, files in os.walk(self.extra_files_path):
+ # Prepare the tree from to perform a Depth First Search
for file in files:
relative_directory = os.path.relpath(root, self.extra_files_path)
relative_file_path = os.path.join(relative_directory, file)
- list_relative_file_path.append(relative_file_path)
+ array_path = relative_file_path.split('/')
+ create_tree(array_path, walkable_tree, array_path, 0)
htmlMakoRendered = mytemplate.render(
- list_relative_file_path=list_relative_file_path
+ walkable_tree=walkable_tree
)
htmlOutput.write(htmlMakoRendered)
diff -r 2a45cd656e8e -r 3233451a3bd6 TrackHub.pyc
Binary file TrackHub.pyc has changed
diff -r 2a45cd656e8e -r 3233451a3bd6 bigPsl.as
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/bigPsl.as Sun Sep 25 11:25:38 2016 -0400
@@ -0,0 +1,33 @@
+table bigPsl
+"bigPsl pairwise alignment"
+ (
+ string chrom; "Reference sequence chromosome or scaffold"
+ uint chromStart; "Start position in chromosome"
+ uint chromEnd; "End position in chromosome"
+ string name; "Name or ID of item, ideally both human readable and unique"
+ uint score; "Score (0-1000)"
+ char[1] strand; "+ or - for strand"
+ uint thickStart; "Start of where display should be thick (start codon)"
+ uint thickEnd; "End of where display should be thick (stop codon)"
+ uint reserved; "RGB value (use R,G,B string in input file)"
+ int blockCount; "Number of blocks"
+ int[blockCount] blockSizes; "Comma separated list of block sizes"
+ int[blockCount] chromStarts; "Start positions relative to chromStart"
+
+ uint oChromStart;"Start position in other chromosome"
+ uint oChromEnd; "End position in other chromosome"
+ char[1] oStrand; "+ or - for other strand"
+ uint oChromSize; "Size of other chromosome."
+ int[blockCount] oChromStarts; "Start positions relative to oChromStart"
+
+ lstring oSequence; "Sequence on other chrom (or edit list, or empty)"
+ string oCDS; "CDS in NCBI format"
+
+ uint chromSize;"Size of target chromosome"
+
+ uint match; "Number of bases matched."
+ uint misMatch; " Number of bases that don't match "
+ uint repMatch; " Number of bases that match but are part of repeats "
+ uint nCount; " Number of 'N' bases "
+ )
+
diff -r 2a45cd656e8e -r 3233451a3bd6 hubArchiveCreator.py
--- a/hubArchiveCreator.py Fri Sep 02 15:41:51 2016 -0400
+++ b/hubArchiveCreator.py Sun Sep 25 11:25:38 2016 -0400
@@ -24,6 +24,7 @@
from util.Filters import TraceBackFormatter
from Gff3 import Gff3
from Gtf import Gtf
+from Psl import Psl
from TrackHub import TrackHub
# TODO: Verify each subprocessed dependency is accessible [gff3ToGenePred, genePredToBed, twoBitInfo, faToTwoBit, bedToBigBed, sort
@@ -54,6 +55,9 @@
# Bam Management
parser.add_argument('--bam', action='append', help='Bam format')
+ # Psl Management
+ parser.add_argument('--psl', action='append', help='Psl format')
+
# TODO: Check if the running directory can have issues if we run the tool outside
parser.add_argument('-d', '--directory',
help='Running tool directory, where to find the templates. Default is running directory')
@@ -106,12 +110,13 @@
# These inputs are populated in the Galaxy Wrapper xml and are in this format:
# ARRAY[DICT{FILE_PATH: DICT{NAME: NAME_VALUE, EXTRA_DATA: EXTRA_DATA_VALUE}}]
# EXTRA_DATA could be anything, for example the index of a BAM => {"index", FILE_PATH}
- array_inputs_gff3 = args.gff3
- array_inputs_bed_simple_repeats = args.bedSimpleRepeats
+ array_inputs_bam = args.bam
array_inputs_bed_generic = args.bed
+ array_inputs_bed_simple_repeats = args.bedSimpleRepeats
+ array_inputs_bigwig = args.bigwig
+ array_inputs_gff3 = args.gff3
array_inputs_gtf = args.gtf
- array_inputs_bam = args.bam
- array_inputs_bigwig = args.bigwig
+ array_inputs_psl = args.psl
outputFile = args.output
@@ -129,12 +134,14 @@
all_datatype_dictionary = {}
- for (inputs, datatype_class) in [(array_inputs_gff3, Gff3),
- (array_inputs_bed_simple_repeats, BedSimpleRepeats),
- (array_inputs_bed_generic, Bed),
- (array_inputs_gtf, Gtf),
- (array_inputs_bam, Bam),
- (array_inputs_bigwig, BigWig)]:
+ for (inputs, datatype_class) in [
+ (array_inputs_bam, Bam),
+ (array_inputs_bed_generic, Bed),
+ (array_inputs_bigwig, BigWig),
+ (array_inputs_bed_simple_repeats, BedSimpleRepeats),
+ (array_inputs_gff3, Gff3),
+ (array_inputs_gtf, Gtf),
+ (array_inputs_psl, Psl)]:
if inputs:
all_datatype_dictionary.update(create_ordered_datatype_objects(datatype_class, inputs, inputs_data))
@@ -187,6 +194,8 @@
for input_false_path in array_inputs:
for key, data_value in inputs_data.items():
if key == input_false_path:
+ logging.debug("input_false_path: " + input_false_path)
+ logging.debug("data_value: " + str(data_value))
extensionObject = ExtensionClass(input_false_path, data_value)
datatype_dictionary.update({data_value["order_index"]: extensionObject})
return datatype_dictionary
diff -r 2a45cd656e8e -r 3233451a3bd6 hubArchiveCreator.xml
--- a/hubArchiveCreator.xml Fri Sep 02 15:41:51 2016 -0400
+++ b/hubArchiveCreator.xml Sun Sep 25 11:25:38 2016 -0400
@@ -12,6 +12,7 @@