# HG changeset patch
# User yating-l
# Date 1496244916 14400
# Node ID fcc1021bd49607d3bc219b19636ee0f4e15ec7b0
# Parent 2677f1899aa861849c1a210e1474e01c41d14597
planemo upload for repository https://github.com/goeckslab/hub-archive-creator commit 93e2e2fb59f99677425104a80c17f665fa7b2b4a-dirty
diff -r 2677f1899aa8 -r fcc1021bd496 Bam.pyc
Binary file Bam.pyc has changed
diff -r 2677f1899aa8 -r fcc1021bd496 Bed.pyc
Binary file Bed.pyc has changed
diff -r 2677f1899aa8 -r fcc1021bd496 BedBlastAlignments.pyc
Binary file BedBlastAlignments.pyc has changed
diff -r 2677f1899aa8 -r fcc1021bd496 BedSimpleRepeats.pyc
Binary file BedSimpleRepeats.pyc has changed
diff -r 2677f1899aa8 -r fcc1021bd496 BedSpliceJunctions.pyc
Binary file BedSpliceJunctions.pyc has changed
diff -r 2677f1899aa8 -r fcc1021bd496 BigBed.py
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/BigBed.py Wed May 31 11:35:16 2017 -0400
@@ -0,0 +1,79 @@
+#!/usr/bin/python
+
+import os
+import shutil
+from subprocess import Popen, PIPE
+import re
+
+# Internal dependencies
+from Datatype import Datatype
+
+class BigBed(Datatype):
+ """ Configurations for creating the bigBed evidence track """
+
+ def __init__(self, input_bigbed_path, data_bigbed):
+ super(BigBed, self).__init__()
+
+ self.track = None
+
+ self.input_bigbed_path = input_bigbed_path
+ self.name_bigbed = data_bigbed["name"]
+ self.priority = data_bigbed["order_index"]
+ self.track_color = data_bigbed["track_color"]
+ self.group_name = data_bigbed["group_name"]
+
+ track_name = "".join((self.name_bigbed, ".bigbed"))
+ if data_bigbed["long_label"]:
+ self.long_label = data_bigbed["long_label"]
+ else:
+ self.long_label = self.name_bigbed
+
+ bigbed_file_path = os.path.join(self.myTrackFolderPath, track_name)
+
+ track_type = self.determine_track_type(input_bigbed_path)
+
+ shutil.copy(self.input_bigbed_path, bigbed_file_path)
+
+ # Create the Track Object
+ self.createTrack(file_path=track_name,
+ track_name=track_name,
+ long_label=self.long_label,
+ track_type=track_type,
+ visibility='hide',
+ priority=self.priority,
+ track_file=bigbed_file_path,
+ track_color=self.track_color,
+ group_name=self.group_name)
+
+ print "- BigBed %s created" % self.name_bigbed
+
+
+ def determine_track_type(self, bb_file):
+ """
+ Determine the number of standard and extra fields using bigBedSummary
+
+ Implementation of reading from stdout is based on a Stackoverflow post:
+ http://stackoverflow.com/questions/2715847/python-read-streaming-input-from-subprocess-communicate
+
+ :param bb_file: path to a bigBed file
+
+ :returns: the bigBed track type
+ """
+
+ cmd_ph = Popen(["bigBedSummary", "-fields", bb_file, "stdout"],
+ stdout=PIPE, bufsize=1)
+
+ pattern = r"(\d+) bed definition fields, (\d+) total fields"
+
+ with cmd_ph.stdout:
+ for line in iter(cmd_ph.stdout.readline, b''):
+ match = re.match(pattern, line)
+
+ if match:
+ extra_mark = "." if match.group(1) == match.group(2) else "+"
+ bed_type = "bigBed %s %s" % (match.group(1), extra_mark)
+ break
+
+ cmd_ph.wait()
+
+ return bed_type
diff -r 2677f1899aa8 -r fcc1021bd496 BigWig.py
--- a/BigWig.py Tue May 09 15:42:43 2017 -0400
+++ b/BigWig.py Wed May 31 11:35:16 2017 -0400
@@ -2,6 +2,8 @@
import os
import shutil
+from subprocess import Popen, PIPE
+import re
# Internal dependencies
from Datatype import Datatype
@@ -36,30 +38,37 @@
self.createTrack(file_path=trackName,
track_name=trackName,
long_label=self.long_label,
- track_type='bigWig', visibility='full',
+ track_type=self.determine_track_type(myBigWigFilePath),
+ visibility='full',
priority=self.priority,
track_file=myBigWigFilePath,
track_color=self.track_color,
group_name=self.group_name)
- # dataURL = "tracks/%s" % trackName
- #
- # # Return the BigBed track
- #
- # trackDb = TrackDb(
- # trackName=trackName,
- # longLabel=self.name_bigwig,
- # shortLabel=self.getShortName( self.name_bigwig ),
- # trackDataURL=dataURL,
- # trackType='bigWig',
- # visibility='full',
- # priority=self.priority,
- # )
- #
- # self.track = Track(
- # trackFile=myBigWigFilePath,
- # trackDb=trackDb,
- # )
-
print("- BigWig %s created" % self.name_bigwig)
#print("- %s created in %s" % (trackName, myBigWigFilePath))
+
+ def determine_track_type(self, bw_file):
+ """
+ bigWig tracks must declare the expected signal range for the data
+ (See https://genome.ucsc.edu/goldenpath/help/trackDb/trackDbHub.html).
+ This method determines the range of values for a bigWig file using
+ the bigWigInfo program.
+
+ Implementation of reading from stdout is based on a Stackoverflow post:
+ http://stackoverflow.com/questions/2715847/python-read-streaming-input-from-subprocess-communicate
+
+ :param bw_file: path to a bigWig file
+
+ :returns: the bigWig track type
+ """
+ cmd_ph = Popen(["bigWigInfo", "-minMax", bw_file],
+ stdout=PIPE, bufsize=1)
+
+ with cmd_ph.stdout:
+ for line in iter(cmd_ph.stdout.readline, b''):
+ bw_type = "bigWig %s" % line.rstrip()
+
+ cmd_ph.wait()
+
+ return bw_type
diff -r 2677f1899aa8 -r fcc1021bd496 BigWig.pyc
Binary file BigWig.pyc has changed
diff -r 2677f1899aa8 -r fcc1021bd496 Datatype.pyc
Binary file Datatype.pyc has changed
diff -r 2677f1899aa8 -r fcc1021bd496 Gff3.pyc
Binary file Gff3.pyc has changed
diff -r 2677f1899aa8 -r fcc1021bd496 Gtf.pyc
Binary file Gtf.pyc has changed
diff -r 2677f1899aa8 -r fcc1021bd496 Psl.pyc
Binary file Psl.pyc has changed
diff -r 2677f1899aa8 -r fcc1021bd496 bigPsl.pyc
Binary file bigPsl.pyc has changed
diff -r 2677f1899aa8 -r fcc1021bd496 cytoBand.py
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/cytoBand.py Wed May 31 11:35:16 2017 -0400
@@ -0,0 +1,75 @@
+#!/usr/bin/python
+
+import os
+import tempfile
+
+from Datatype import Datatype
+from Track import Track
+from TrackDb import TrackDb
+from util import subtools
+
+
+class cytoBand( Datatype ):
+ def __init__(self, input_bed_cytoBand_false_path, data_bed_cytoBand):
+
+ super(cytoBand, self).__init__()
+
+ self.input_bed_cytoBand_false_path = input_bed_cytoBand_false_path
+ self.name_bed_cytoBand = data_bed_cytoBand["name"]
+ self.priority = data_bed_cytoBand["order_index"]
+ self.track_color = data_bed_cytoBand["track_color"]
+ # TODO: Think about how to avoid repetition of the group_name everywhere
+ self.group_name = data_bed_cytoBand["group_name"]
+ if data_bed_cytoBand["long_label"]:
+ self.long_label = data_bed_cytoBand["long_label"]
+ else:
+ self.long_label = self.name_bed_cytoBand
+ sortedBedFile = tempfile.NamedTemporaryFile(suffix=".sortedBed")
+
+ # Sort processing
+ subtools.sort(self.input_bed_cytoBand_false_path, sortedBedFile.name)
+
+ # bedToBigBed processing
+ # TODO: Change the name of the bb, to tool + genome + .bb
+ trackName = "".join( ( self.name_bed_cytoBand, '.bb' ) )
+ myBigBedFilePath = os.path.join(self.myTrackFolderPath, trackName)
+
+ auto_sql_option = os.path.join(self.tool_directory, 'cytoBandIdeo.as')
+
+ with open(myBigBedFilePath, 'w') as bigBedFile:
+ subtools.bedToBigBed(sortedBedFile.name,
+ self.chromSizesFile.name,
+ bigBedFile.name,
+ typeOption='bed4',
+ autoSql=auto_sql_option)
+
+ # Create the Track Object
+ self.createTrack(file_path=trackName,
+ track_name='cytoBandIdeo',
+ long_label=self.long_label,
+ track_type='bigBed',
+ visibility='dense',
+ priority=self.priority,
+ track_file=myBigBedFilePath,
+ track_color=self.track_color,
+ group_name=self.group_name)
+
+ # dataURL = "tracks/%s" % trackName
+ #
+ # trackDb = TrackDb(
+ # trackName=trackName,
+ # longLabel=self.name_bed_simple_repeats,
+ # shortLabel=self.getShortName( self.name_bed_simple_repeats ),
+ # trackDataURL=dataURL,
+ # trackType='bigBed 4 +',
+ # visibility='dense',
+ # priority=self.priority,
+ # )
+ #
+ # self.track = Track(
+ # trackFile=myBigBedFilePath,
+ # trackDb=trackDb,
+ # )
+
+ print("- Bed splice junctions %s created" % self.name_bed_cytoBand)
+ #print("- %s created in %s" % (trackName, myBigBedFilePath))
diff -r 2677f1899aa8 -r fcc1021bd496 cytoBandIdeo.as
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/cytoBandIdeo.as Wed May 31 11:35:16 2017 -0400
@@ -0,0 +1,9 @@
+table cytoBandIdeo
+"cytoBandIdeo Describes the positions of cytogenetic bands with a chromosome"
+ (
+ string chrom; "Reference sequence chromosome or scaffold"
+ uint chromStart; "Start position in chromosome"
+ uint chromEnd; "End position in chromosome"
+ string name; "Name of item"
+ string gieStain; "Giemsa stain results: gneg gpos25 gpos50 gpos75 gpos100 acen gvar stalk"
+ )
\ No newline at end of file
diff -r 2677f1899aa8 -r fcc1021bd496 hubArchiveCreator.py
--- a/hubArchiveCreator.py Tue May 09 15:42:43 2017 -0400
+++ b/hubArchiveCreator.py Wed May 31 11:35:16 2017 -0400
@@ -20,6 +20,7 @@
from BedSimpleRepeats import BedSimpleRepeats
from BedSpliceJunctions import BedSpliceJunctions
from Bed import Bed
+from cytoBand import cytoBand
from BigWig import BigWig
from util.Fasta import Fasta
from util.Filters import TraceBackFormatter
@@ -29,6 +30,7 @@
from TrackHub import TrackHub
from bigPsl import bigPsl
from BedBlastAlignments import BedBlastAlignments
+from BigBed import BigBed
# TODO: Verify each subprocessed dependency is accessible [gff3ToGenePred, genePredToBed, twoBitInfo, faToTwoBit, bedToBigBed, sort
@@ -55,6 +57,9 @@
# Generic Bed (Blastx transformed to bed)
parser.add_argument('--bed', action='append', help='Bed generic format')
+ #cytoBandIdeo
+ parser.add_argument('--cytoBand', action='append', help='Cytoband Track, using cytoBandIdeo.as')
+
# BigPsl (blat alignment)
parser.add_argument('--bigpsl', action='append', help='bigPsl format, using bigPsl.as')
@@ -70,6 +75,9 @@
# Psl Management
parser.add_argument('--psl', action='append', help='Psl format')
+ # BigBed Management
+ parser.add_argument('--bigbed', action='append', help='BigBed format')
+
# TODO: Check if the running directory can have issues if we run the tool outside
parser.add_argument('-d', '--directory',
help='Running tool directory, where to find the templates. Default is running directory')
@@ -124,6 +132,7 @@
# EXTRA_DATA could be anything, for example the index of a BAM => {"index", FILE_PATH}
array_inputs_bam = args.bam
array_inputs_bed_generic = args.bed
+ array_inputs_bed_cytoBand = args.cytoBand
array_inputs_bed_simple_repeats = args.bedSimpleRepeats
array_inputs_bed_splice_junctions = args.bedSpliceJunctions
array_inputs_bigwig = args.bigwig
@@ -132,6 +141,7 @@
array_inputs_psl = args.psl
array_inputs_bigpsl = args.bigpsl
array_inputs_bed_blast_alignments = args.bedBlastAlignments
+ array_inputs_bigbed = args.bigbed
outputFile = args.output
@@ -152,6 +162,7 @@
for (inputs, datatype_class) in [
(array_inputs_bam, Bam),
(array_inputs_bed_generic, Bed),
+ (array_inputs_bed_cytoBand, cytoBand),
(array_inputs_bigwig, BigWig),
(array_inputs_bed_simple_repeats, BedSimpleRepeats),
(array_inputs_bed_splice_junctions, BedSpliceJunctions),
@@ -159,7 +170,8 @@
(array_inputs_gtf, Gtf),
(array_inputs_psl, Psl),
(array_inputs_bigpsl, bigPsl),
- (array_inputs_bed_blast_alignments, BedBlastAlignments)]:
+ (array_inputs_bed_blast_alignments, BedBlastAlignments),
+ (array_inputs_bigbed, BigBed)]:
if inputs:
all_datatype_dictionary.update(create_ordered_datatype_objects(datatype_class, inputs, inputs_data))
diff -r 2677f1899aa8 -r fcc1021bd496 hubArchiveCreator.xml
--- a/hubArchiveCreator.xml Tue May 09 15:42:43 2017 -0400
+++ b/hubArchiveCreator.xml Wed May 31 11:35:16 2017 -0400
@@ -5,8 +5,10 @@
- ucsc_tools_340
+ ucsc_hac
samtools
+ ucsc_bigwig
+ ucsc_bigbed
@@ -90,6 +92,11 @@
#silent $prepare_json($f.formatChoice.bedChoice.BED, $index_track_final,
extra_data_dict)
#end if
+ #if $f.formatChoice.bedChoice.bed_select == "bed_cytoBand"
+ --cytoBand $f.formatChoice.bedChoice.BED_cytoBand
+ #silent $prepare_json($f.formatChoice.bedChoice.BED_cytoBand, $index_track_final,
+ extra_data_dict)
+ #end if
#if $f.formatChoice.bedChoice.bed_select == "bed_simple_repeats_option"
--bedSimpleRepeats $f.formatChoice.bedChoice.BED_simple_repeats
#silent $prepare_json($f.formatChoice.bedChoice.BED_simple_repeats, $index_track_final,
@@ -102,12 +109,12 @@
#end if
#if $f.formatChoice.bedChoice.bed_select == "bed_blast_alignment_option"
--bedBlastAlignments $f.formatChoice.bedChoice.BED_blast_alignment
- #silent $prepare_json($f.formatChoice.bedChoice.BED_blast_alignment, $index_track_final,
+ #silent $prepare_json($f.formatChoice.bedChoice.BED_blast_alignment, $index_track_final,
extra_data_dict)
#end if
#if $f.formatChoice.bedChoice.bed_select == "bed_blat_alignment_option"
--bigpsl $f.formatChoice.bedChoice.BED_blat_alignment
- #silent $prepare_json($f.formatChoice.bedChoice.BED_blat_alignment, $index_track_final,
+ #silent $prepare_json($f.formatChoice.bedChoice.BED_blat_alignment, $index_track_final,
extra_data_dict)
#end if
#end if
@@ -121,6 +128,11 @@
#silent $prepare_json($f.formatChoice.BIGWIG, $index_track_final,
extra_data_dict)
#end if
+ #if $f.formatChoice.format_select == "bigbed"
+ --bigbed $f.formatChoice.BIGBED
+ #silent $prepare_json($f.formatChoice.BIGBED, $index_track_final,
+ extra_data_dict)
+ #end if
#if $f.formatChoice.format_select == "gff3"
--gff3 $f.formatChoice.GFF3
#silent $prepare_json($f.formatChoice.GFF3, $index_track_final,
@@ -180,6 +192,7 @@
+
@@ -200,11 +213,12 @@
-
+
+
@@ -219,6 +233,14 @@
/>
+
+
+
+
+
+
+
+
+
+
+
+
+
+
-
+
@@ -1029,4 +1067,8 @@
This Galaxy tool permits to prepare your files to be ready for
Assembly Hub visualization.
+
+
+ 10.7490/f1000research.1112719.1
+
diff -r 2677f1899aa8 -r fcc1021bd496 templates/trackDb/layout.txt
--- a/templates/trackDb/layout.txt Tue May 09 15:42:43 2017 -0400
+++ b/templates/trackDb/layout.txt Wed May 31 11:35:16 2017 -0400
@@ -1,14 +1,32 @@
% for trackDb in trackDbs:
- ## See this http://genome.ucsc.edu/goldenPath/help/hgTrackHubHelp.html
- track ${trackDb.trackName}
- longLabel ${trackDb.longLabel}
- shortLabel ${trackDb.shortLabel}
- bigDataUrl ${trackDb.trackDataURL}
- type ${trackDb.trackType}
- visibility ${trackDb.visibility}
- thickDrawItem ${trackDb.thickDrawItem}
- priority ${trackDb.priority}
- color ${trackDb.track_color}
- group ${trackDb.group_name.lower().replace(' ', '_')}
+ % if "bigWig" in trackDb.trackType:
+
+track ${trackDb.trackName}
+longLabel ${trackDb.longLabel}
+shortLabel ${trackDb.shortLabel}
+bigDataUrl ${trackDb.trackDataURL}
+visibility ${trackDb.visibility}
+priority ${trackDb.priority}
+color ${trackDb.track_color}
+group ${trackDb.group_name.lower().replace(' ', '_')}
+type ${trackDb.trackType}
+autoScale on
+maxHeightPixels 100:32:8
+windowingFunction mean+whiskers
+ % else:
+
+## See this http://genome.ucsc.edu/goldenPath/help/hgTrackHubHelp.html
+track ${trackDb.trackName}
+longLabel ${trackDb.longLabel}
+shortLabel ${trackDb.shortLabel}
+bigDataUrl ${trackDb.trackDataURL}
+type ${trackDb.trackType}
+visibility ${trackDb.visibility}
+thickDrawItem ${trackDb.thickDrawItem}
+priority ${trackDb.priority}
+color ${trackDb.track_color}
+group ${trackDb.group_name.lower().replace(' ', '_')}
+
+ % endif
% endfor
diff -r 2677f1899aa8 -r fcc1021bd496 tool_dependencies.xml
--- a/tool_dependencies.xml Tue May 09 15:42:43 2017 -0400
+++ b/tool_dependencies.xml Wed May 31 11:35:16 2017 -0400
@@ -1,6 +1,5 @@
-