# HG changeset patch # User yating-l # Date 1496244916 14400 # Node ID fcc1021bd49607d3bc219b19636ee0f4e15ec7b0 # Parent 2677f1899aa861849c1a210e1474e01c41d14597 planemo upload for repository https://github.com/goeckslab/hub-archive-creator commit 93e2e2fb59f99677425104a80c17f665fa7b2b4a-dirty diff -r 2677f1899aa8 -r fcc1021bd496 Bam.pyc Binary file Bam.pyc has changed diff -r 2677f1899aa8 -r fcc1021bd496 Bed.pyc Binary file Bed.pyc has changed diff -r 2677f1899aa8 -r fcc1021bd496 BedBlastAlignments.pyc Binary file BedBlastAlignments.pyc has changed diff -r 2677f1899aa8 -r fcc1021bd496 BedSimpleRepeats.pyc Binary file BedSimpleRepeats.pyc has changed diff -r 2677f1899aa8 -r fcc1021bd496 BedSpliceJunctions.pyc Binary file BedSpliceJunctions.pyc has changed diff -r 2677f1899aa8 -r fcc1021bd496 BigBed.py --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/BigBed.py Wed May 31 11:35:16 2017 -0400 @@ -0,0 +1,79 @@ +#!/usr/bin/python + +import os +import shutil +from subprocess import Popen, PIPE +import re + +# Internal dependencies +from Datatype import Datatype + +class BigBed(Datatype): + """ Configurations for creating the bigBed evidence track """ + + def __init__(self, input_bigbed_path, data_bigbed): + super(BigBed, self).__init__() + + self.track = None + + self.input_bigbed_path = input_bigbed_path + self.name_bigbed = data_bigbed["name"] + self.priority = data_bigbed["order_index"] + self.track_color = data_bigbed["track_color"] + self.group_name = data_bigbed["group_name"] + + track_name = "".join((self.name_bigbed, ".bigbed")) + if data_bigbed["long_label"]: + self.long_label = data_bigbed["long_label"] + else: + self.long_label = self.name_bigbed + + bigbed_file_path = os.path.join(self.myTrackFolderPath, track_name) + + track_type = self.determine_track_type(input_bigbed_path) + + shutil.copy(self.input_bigbed_path, bigbed_file_path) + + # Create the Track Object + self.createTrack(file_path=track_name, + track_name=track_name, + long_label=self.long_label, + track_type=track_type, + visibility='hide', + priority=self.priority, + track_file=bigbed_file_path, + track_color=self.track_color, + group_name=self.group_name) + + print "- BigBed %s created" % self.name_bigbed + + + def determine_track_type(self, bb_file): + """ + Determine the number of standard and extra fields using bigBedSummary + + Implementation of reading from stdout is based on a Stackoverflow post: + http://stackoverflow.com/questions/2715847/python-read-streaming-input-from-subprocess-communicate + + :param bb_file: path to a bigBed file + + :returns: the bigBed track type + """ + + cmd_ph = Popen(["bigBedSummary", "-fields", bb_file, "stdout"], + stdout=PIPE, bufsize=1) + + pattern = r"(\d+) bed definition fields, (\d+) total fields" + + with cmd_ph.stdout: + for line in iter(cmd_ph.stdout.readline, b''): + match = re.match(pattern, line) + + if match: + extra_mark = "." if match.group(1) == match.group(2) else "+" + bed_type = "bigBed %s %s" % (match.group(1), extra_mark) + break + + cmd_ph.wait() + + return bed_type diff -r 2677f1899aa8 -r fcc1021bd496 BigWig.py --- a/BigWig.py Tue May 09 15:42:43 2017 -0400 +++ b/BigWig.py Wed May 31 11:35:16 2017 -0400 @@ -2,6 +2,8 @@ import os import shutil +from subprocess import Popen, PIPE +import re # Internal dependencies from Datatype import Datatype @@ -36,30 +38,37 @@ self.createTrack(file_path=trackName, track_name=trackName, long_label=self.long_label, - track_type='bigWig', visibility='full', + track_type=self.determine_track_type(myBigWigFilePath), + visibility='full', priority=self.priority, track_file=myBigWigFilePath, track_color=self.track_color, group_name=self.group_name) - # dataURL = "tracks/%s" % trackName - # - # # Return the BigBed track - # - # trackDb = TrackDb( - # trackName=trackName, - # longLabel=self.name_bigwig, - # shortLabel=self.getShortName( self.name_bigwig ), - # trackDataURL=dataURL, - # trackType='bigWig', - # visibility='full', - # priority=self.priority, - # ) - # - # self.track = Track( - # trackFile=myBigWigFilePath, - # trackDb=trackDb, - # ) - print("- BigWig %s created" % self.name_bigwig) #print("- %s created in %s" % (trackName, myBigWigFilePath)) + + def determine_track_type(self, bw_file): + """ + bigWig tracks must declare the expected signal range for the data + (See https://genome.ucsc.edu/goldenpath/help/trackDb/trackDbHub.html). + This method determines the range of values for a bigWig file using + the bigWigInfo program. + + Implementation of reading from stdout is based on a Stackoverflow post: + http://stackoverflow.com/questions/2715847/python-read-streaming-input-from-subprocess-communicate + + :param bw_file: path to a bigWig file + + :returns: the bigWig track type + """ + cmd_ph = Popen(["bigWigInfo", "-minMax", bw_file], + stdout=PIPE, bufsize=1) + + with cmd_ph.stdout: + for line in iter(cmd_ph.stdout.readline, b''): + bw_type = "bigWig %s" % line.rstrip() + + cmd_ph.wait() + + return bw_type diff -r 2677f1899aa8 -r fcc1021bd496 BigWig.pyc Binary file BigWig.pyc has changed diff -r 2677f1899aa8 -r fcc1021bd496 Datatype.pyc Binary file Datatype.pyc has changed diff -r 2677f1899aa8 -r fcc1021bd496 Gff3.pyc Binary file Gff3.pyc has changed diff -r 2677f1899aa8 -r fcc1021bd496 Gtf.pyc Binary file Gtf.pyc has changed diff -r 2677f1899aa8 -r fcc1021bd496 Psl.pyc Binary file Psl.pyc has changed diff -r 2677f1899aa8 -r fcc1021bd496 bigPsl.pyc Binary file bigPsl.pyc has changed diff -r 2677f1899aa8 -r fcc1021bd496 cytoBand.py --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/cytoBand.py Wed May 31 11:35:16 2017 -0400 @@ -0,0 +1,75 @@ +#!/usr/bin/python + +import os +import tempfile + +from Datatype import Datatype +from Track import Track +from TrackDb import TrackDb +from util import subtools + + +class cytoBand( Datatype ): + def __init__(self, input_bed_cytoBand_false_path, data_bed_cytoBand): + + super(cytoBand, self).__init__() + + self.input_bed_cytoBand_false_path = input_bed_cytoBand_false_path + self.name_bed_cytoBand = data_bed_cytoBand["name"] + self.priority = data_bed_cytoBand["order_index"] + self.track_color = data_bed_cytoBand["track_color"] + # TODO: Think about how to avoid repetition of the group_name everywhere + self.group_name = data_bed_cytoBand["group_name"] + if data_bed_cytoBand["long_label"]: + self.long_label = data_bed_cytoBand["long_label"] + else: + self.long_label = self.name_bed_cytoBand + sortedBedFile = tempfile.NamedTemporaryFile(suffix=".sortedBed") + + # Sort processing + subtools.sort(self.input_bed_cytoBand_false_path, sortedBedFile.name) + + # bedToBigBed processing + # TODO: Change the name of the bb, to tool + genome + .bb + trackName = "".join( ( self.name_bed_cytoBand, '.bb' ) ) + myBigBedFilePath = os.path.join(self.myTrackFolderPath, trackName) + + auto_sql_option = os.path.join(self.tool_directory, 'cytoBandIdeo.as') + + with open(myBigBedFilePath, 'w') as bigBedFile: + subtools.bedToBigBed(sortedBedFile.name, + self.chromSizesFile.name, + bigBedFile.name, + typeOption='bed4', + autoSql=auto_sql_option) + + # Create the Track Object + self.createTrack(file_path=trackName, + track_name='cytoBandIdeo', + long_label=self.long_label, + track_type='bigBed', + visibility='dense', + priority=self.priority, + track_file=myBigBedFilePath, + track_color=self.track_color, + group_name=self.group_name) + + # dataURL = "tracks/%s" % trackName + # + # trackDb = TrackDb( + # trackName=trackName, + # longLabel=self.name_bed_simple_repeats, + # shortLabel=self.getShortName( self.name_bed_simple_repeats ), + # trackDataURL=dataURL, + # trackType='bigBed 4 +', + # visibility='dense', + # priority=self.priority, + # ) + # + # self.track = Track( + # trackFile=myBigBedFilePath, + # trackDb=trackDb, + # ) + + print("- Bed splice junctions %s created" % self.name_bed_cytoBand) + #print("- %s created in %s" % (trackName, myBigBedFilePath)) diff -r 2677f1899aa8 -r fcc1021bd496 cytoBandIdeo.as --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/cytoBandIdeo.as Wed May 31 11:35:16 2017 -0400 @@ -0,0 +1,9 @@ +table cytoBandIdeo +"cytoBandIdeo Describes the positions of cytogenetic bands with a chromosome" + ( + string chrom; "Reference sequence chromosome or scaffold" + uint chromStart; "Start position in chromosome" + uint chromEnd; "End position in chromosome" + string name; "Name of item" + string gieStain; "Giemsa stain results: gneg gpos25 gpos50 gpos75 gpos100 acen gvar stalk" + ) \ No newline at end of file diff -r 2677f1899aa8 -r fcc1021bd496 hubArchiveCreator.py --- a/hubArchiveCreator.py Tue May 09 15:42:43 2017 -0400 +++ b/hubArchiveCreator.py Wed May 31 11:35:16 2017 -0400 @@ -20,6 +20,7 @@ from BedSimpleRepeats import BedSimpleRepeats from BedSpliceJunctions import BedSpliceJunctions from Bed import Bed +from cytoBand import cytoBand from BigWig import BigWig from util.Fasta import Fasta from util.Filters import TraceBackFormatter @@ -29,6 +30,7 @@ from TrackHub import TrackHub from bigPsl import bigPsl from BedBlastAlignments import BedBlastAlignments +from BigBed import BigBed # TODO: Verify each subprocessed dependency is accessible [gff3ToGenePred, genePredToBed, twoBitInfo, faToTwoBit, bedToBigBed, sort @@ -55,6 +57,9 @@ # Generic Bed (Blastx transformed to bed) parser.add_argument('--bed', action='append', help='Bed generic format') + #cytoBandIdeo + parser.add_argument('--cytoBand', action='append', help='Cytoband Track, using cytoBandIdeo.as') + # BigPsl (blat alignment) parser.add_argument('--bigpsl', action='append', help='bigPsl format, using bigPsl.as') @@ -70,6 +75,9 @@ # Psl Management parser.add_argument('--psl', action='append', help='Psl format') + # BigBed Management + parser.add_argument('--bigbed', action='append', help='BigBed format') + # TODO: Check if the running directory can have issues if we run the tool outside parser.add_argument('-d', '--directory', help='Running tool directory, where to find the templates. Default is running directory') @@ -124,6 +132,7 @@ # EXTRA_DATA could be anything, for example the index of a BAM => {"index", FILE_PATH} array_inputs_bam = args.bam array_inputs_bed_generic = args.bed + array_inputs_bed_cytoBand = args.cytoBand array_inputs_bed_simple_repeats = args.bedSimpleRepeats array_inputs_bed_splice_junctions = args.bedSpliceJunctions array_inputs_bigwig = args.bigwig @@ -132,6 +141,7 @@ array_inputs_psl = args.psl array_inputs_bigpsl = args.bigpsl array_inputs_bed_blast_alignments = args.bedBlastAlignments + array_inputs_bigbed = args.bigbed outputFile = args.output @@ -152,6 +162,7 @@ for (inputs, datatype_class) in [ (array_inputs_bam, Bam), (array_inputs_bed_generic, Bed), + (array_inputs_bed_cytoBand, cytoBand), (array_inputs_bigwig, BigWig), (array_inputs_bed_simple_repeats, BedSimpleRepeats), (array_inputs_bed_splice_junctions, BedSpliceJunctions), @@ -159,7 +170,8 @@ (array_inputs_gtf, Gtf), (array_inputs_psl, Psl), (array_inputs_bigpsl, bigPsl), - (array_inputs_bed_blast_alignments, BedBlastAlignments)]: + (array_inputs_bed_blast_alignments, BedBlastAlignments), + (array_inputs_bigbed, BigBed)]: if inputs: all_datatype_dictionary.update(create_ordered_datatype_objects(datatype_class, inputs, inputs_data)) diff -r 2677f1899aa8 -r fcc1021bd496 hubArchiveCreator.xml --- a/hubArchiveCreator.xml Tue May 09 15:42:43 2017 -0400 +++ b/hubArchiveCreator.xml Wed May 31 11:35:16 2017 -0400 @@ -5,8 +5,10 @@ - ucsc_tools_340 + ucsc_hac samtools + ucsc_bigwig + ucsc_bigbed @@ -90,6 +92,11 @@ #silent $prepare_json($f.formatChoice.bedChoice.BED, $index_track_final, extra_data_dict) #end if + #if $f.formatChoice.bedChoice.bed_select == "bed_cytoBand" + --cytoBand $f.formatChoice.bedChoice.BED_cytoBand + #silent $prepare_json($f.formatChoice.bedChoice.BED_cytoBand, $index_track_final, + extra_data_dict) + #end if #if $f.formatChoice.bedChoice.bed_select == "bed_simple_repeats_option" --bedSimpleRepeats $f.formatChoice.bedChoice.BED_simple_repeats #silent $prepare_json($f.formatChoice.bedChoice.BED_simple_repeats, $index_track_final, @@ -102,12 +109,12 @@ #end if #if $f.formatChoice.bedChoice.bed_select == "bed_blast_alignment_option" --bedBlastAlignments $f.formatChoice.bedChoice.BED_blast_alignment - #silent $prepare_json($f.formatChoice.bedChoice.BED_blast_alignment, $index_track_final, + #silent $prepare_json($f.formatChoice.bedChoice.BED_blast_alignment, $index_track_final, extra_data_dict) #end if #if $f.formatChoice.bedChoice.bed_select == "bed_blat_alignment_option" --bigpsl $f.formatChoice.bedChoice.BED_blat_alignment - #silent $prepare_json($f.formatChoice.bedChoice.BED_blat_alignment, $index_track_final, + #silent $prepare_json($f.formatChoice.bedChoice.BED_blat_alignment, $index_track_final, extra_data_dict) #end if #end if @@ -121,6 +128,11 @@ #silent $prepare_json($f.formatChoice.BIGWIG, $index_track_final, extra_data_dict) #end if + #if $f.formatChoice.format_select == "bigbed" + --bigbed $f.formatChoice.BIGBED + #silent $prepare_json($f.formatChoice.BIGBED, $index_track_final, + extra_data_dict) + #end if #if $f.formatChoice.format_select == "gff3" --gff3 $f.formatChoice.GFF3 #silent $prepare_json($f.formatChoice.GFF3, $index_track_final, @@ -180,6 +192,7 @@ + @@ -200,11 +213,12 @@ - + + @@ -219,6 +233,14 @@ /> + + + + + + + + + + + + + + - + @@ -1029,4 +1067,8 @@ This Galaxy tool permits to prepare your files to be ready for Assembly Hub visualization. + + + 10.7490/f1000research.1112719.1 + diff -r 2677f1899aa8 -r fcc1021bd496 templates/trackDb/layout.txt --- a/templates/trackDb/layout.txt Tue May 09 15:42:43 2017 -0400 +++ b/templates/trackDb/layout.txt Wed May 31 11:35:16 2017 -0400 @@ -1,14 +1,32 @@ % for trackDb in trackDbs: - ## See this http://genome.ucsc.edu/goldenPath/help/hgTrackHubHelp.html - track ${trackDb.trackName} - longLabel ${trackDb.longLabel} - shortLabel ${trackDb.shortLabel} - bigDataUrl ${trackDb.trackDataURL} - type ${trackDb.trackType} - visibility ${trackDb.visibility} - thickDrawItem ${trackDb.thickDrawItem} - priority ${trackDb.priority} - color ${trackDb.track_color} - group ${trackDb.group_name.lower().replace(' ', '_')} + % if "bigWig" in trackDb.trackType: + +track ${trackDb.trackName} +longLabel ${trackDb.longLabel} +shortLabel ${trackDb.shortLabel} +bigDataUrl ${trackDb.trackDataURL} +visibility ${trackDb.visibility} +priority ${trackDb.priority} +color ${trackDb.track_color} +group ${trackDb.group_name.lower().replace(' ', '_')} +type ${trackDb.trackType} +autoScale on +maxHeightPixels 100:32:8 +windowingFunction mean+whiskers + % else: + +## See this http://genome.ucsc.edu/goldenPath/help/hgTrackHubHelp.html +track ${trackDb.trackName} +longLabel ${trackDb.longLabel} +shortLabel ${trackDb.shortLabel} +bigDataUrl ${trackDb.trackDataURL} +type ${trackDb.trackType} +visibility ${trackDb.visibility} +thickDrawItem ${trackDb.thickDrawItem} +priority ${trackDb.priority} +color ${trackDb.track_color} +group ${trackDb.group_name.lower().replace(' ', '_')} + + % endif % endfor diff -r 2677f1899aa8 -r fcc1021bd496 tool_dependencies.xml --- a/tool_dependencies.xml Tue May 09 15:42:43 2017 -0400 +++ b/tool_dependencies.xml Wed May 31 11:35:16 2017 -0400 @@ -1,6 +1,5 @@ -