Mercurial > repos > rmarenco > hubarchivecreator
changeset 24:fcc1021bd496 draft
planemo upload for repository https://github.com/goeckslab/hub-archive-creator commit 93e2e2fb59f99677425104a80c17f665fa7b2b4a-dirty
author | yating-l |
---|---|
date | Wed, 31 May 2017 11:35:16 -0400 |
parents | 2677f1899aa8 |
children | 99dad5f9444c |
files | Bam.pyc Bed.pyc BedBlastAlignments.pyc BedSimpleRepeats.pyc BedSpliceJunctions.pyc BigBed.py BigWig.py BigWig.pyc Datatype.pyc Gff3.pyc Gtf.pyc Psl.pyc bigPsl.pyc cytoBand.py cytoBandIdeo.as hubArchiveCreator.py hubArchiveCreator.xml templates/trackDb/layout.txt tool_dependencies.xml util/subtools.py |
diffstat | 20 files changed, 293 insertions(+), 56 deletions(-) [+] |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/BigBed.py Wed May 31 11:35:16 2017 -0400 @@ -0,0 +1,79 @@ +#!/usr/bin/python + +import os +import shutil +from subprocess import Popen, PIPE +import re + +# Internal dependencies +from Datatype import Datatype + +class BigBed(Datatype): + """ Configurations for creating the bigBed evidence track """ + + def __init__(self, input_bigbed_path, data_bigbed): + super(BigBed, self).__init__() + + self.track = None + + self.input_bigbed_path = input_bigbed_path + self.name_bigbed = data_bigbed["name"] + self.priority = data_bigbed["order_index"] + self.track_color = data_bigbed["track_color"] + self.group_name = data_bigbed["group_name"] + + track_name = "".join((self.name_bigbed, ".bigbed")) + if data_bigbed["long_label"]: + self.long_label = data_bigbed["long_label"] + else: + self.long_label = self.name_bigbed + + bigbed_file_path = os.path.join(self.myTrackFolderPath, track_name) + + track_type = self.determine_track_type(input_bigbed_path) + + shutil.copy(self.input_bigbed_path, bigbed_file_path) + + # Create the Track Object + self.createTrack(file_path=track_name, + track_name=track_name, + long_label=self.long_label, + track_type=track_type, + visibility='hide', + priority=self.priority, + track_file=bigbed_file_path, + track_color=self.track_color, + group_name=self.group_name) + + print "- BigBed %s created" % self.name_bigbed + + + def determine_track_type(self, bb_file): + """ + Determine the number of standard and extra fields using bigBedSummary + + Implementation of reading from stdout is based on a Stackoverflow post: + http://stackoverflow.com/questions/2715847/python-read-streaming-input-from-subprocess-communicate + + :param bb_file: path to a bigBed file + + :returns: the bigBed track type + """ + + cmd_ph = Popen(["bigBedSummary", "-fields", bb_file, "stdout"], + stdout=PIPE, bufsize=1) + + pattern = r"(\d+) bed definition fields, (\d+) total fields" + + with cmd_ph.stdout: + for line in iter(cmd_ph.stdout.readline, b''): + match = re.match(pattern, line) + + if match: + extra_mark = "." if match.group(1) == match.group(2) else "+" + bed_type = "bigBed %s %s" % (match.group(1), extra_mark) + break + + cmd_ph.wait() + + return bed_type
--- a/BigWig.py Tue May 09 15:42:43 2017 -0400 +++ b/BigWig.py Wed May 31 11:35:16 2017 -0400 @@ -2,6 +2,8 @@ import os import shutil +from subprocess import Popen, PIPE +import re # Internal dependencies from Datatype import Datatype @@ -36,30 +38,37 @@ self.createTrack(file_path=trackName, track_name=trackName, long_label=self.long_label, - track_type='bigWig', visibility='full', + track_type=self.determine_track_type(myBigWigFilePath), + visibility='full', priority=self.priority, track_file=myBigWigFilePath, track_color=self.track_color, group_name=self.group_name) - # dataURL = "tracks/%s" % trackName - # - # # Return the BigBed track - # - # trackDb = TrackDb( - # trackName=trackName, - # longLabel=self.name_bigwig, - # shortLabel=self.getShortName( self.name_bigwig ), - # trackDataURL=dataURL, - # trackType='bigWig', - # visibility='full', - # priority=self.priority, - # ) - # - # self.track = Track( - # trackFile=myBigWigFilePath, - # trackDb=trackDb, - # ) - print("- BigWig %s created" % self.name_bigwig) #print("- %s created in %s" % (trackName, myBigWigFilePath)) + + def determine_track_type(self, bw_file): + """ + bigWig tracks must declare the expected signal range for the data + (See https://genome.ucsc.edu/goldenpath/help/trackDb/trackDbHub.html). + This method determines the range of values for a bigWig file using + the bigWigInfo program. + + Implementation of reading from stdout is based on a Stackoverflow post: + http://stackoverflow.com/questions/2715847/python-read-streaming-input-from-subprocess-communicate + + :param bw_file: path to a bigWig file + + :returns: the bigWig track type + """ + cmd_ph = Popen(["bigWigInfo", "-minMax", bw_file], + stdout=PIPE, bufsize=1) + + with cmd_ph.stdout: + for line in iter(cmd_ph.stdout.readline, b''): + bw_type = "bigWig %s" % line.rstrip() + + cmd_ph.wait() + + return bw_type
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/cytoBand.py Wed May 31 11:35:16 2017 -0400 @@ -0,0 +1,75 @@ +#!/usr/bin/python + +import os +import tempfile + +from Datatype import Datatype +from Track import Track +from TrackDb import TrackDb +from util import subtools + + +class cytoBand( Datatype ): + def __init__(self, input_bed_cytoBand_false_path, data_bed_cytoBand): + + super(cytoBand, self).__init__() + + self.input_bed_cytoBand_false_path = input_bed_cytoBand_false_path + self.name_bed_cytoBand = data_bed_cytoBand["name"] + self.priority = data_bed_cytoBand["order_index"] + self.track_color = data_bed_cytoBand["track_color"] + # TODO: Think about how to avoid repetition of the group_name everywhere + self.group_name = data_bed_cytoBand["group_name"] + if data_bed_cytoBand["long_label"]: + self.long_label = data_bed_cytoBand["long_label"] + else: + self.long_label = self.name_bed_cytoBand + sortedBedFile = tempfile.NamedTemporaryFile(suffix=".sortedBed") + + # Sort processing + subtools.sort(self.input_bed_cytoBand_false_path, sortedBedFile.name) + + # bedToBigBed processing + # TODO: Change the name of the bb, to tool + genome + .bb + trackName = "".join( ( self.name_bed_cytoBand, '.bb' ) ) + myBigBedFilePath = os.path.join(self.myTrackFolderPath, trackName) + + auto_sql_option = os.path.join(self.tool_directory, 'cytoBandIdeo.as') + + with open(myBigBedFilePath, 'w') as bigBedFile: + subtools.bedToBigBed(sortedBedFile.name, + self.chromSizesFile.name, + bigBedFile.name, + typeOption='bed4', + autoSql=auto_sql_option) + + # Create the Track Object + self.createTrack(file_path=trackName, + track_name='cytoBandIdeo', + long_label=self.long_label, + track_type='bigBed', + visibility='dense', + priority=self.priority, + track_file=myBigBedFilePath, + track_color=self.track_color, + group_name=self.group_name) + + # dataURL = "tracks/%s" % trackName + # + # trackDb = TrackDb( + # trackName=trackName, + # longLabel=self.name_bed_simple_repeats, + # shortLabel=self.getShortName( self.name_bed_simple_repeats ), + # trackDataURL=dataURL, + # trackType='bigBed 4 +', + # visibility='dense', + # priority=self.priority, + # ) + # + # self.track = Track( + # trackFile=myBigBedFilePath, + # trackDb=trackDb, + # ) + + print("- Bed splice junctions %s created" % self.name_bed_cytoBand) + #print("- %s created in %s" % (trackName, myBigBedFilePath))
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/cytoBandIdeo.as Wed May 31 11:35:16 2017 -0400 @@ -0,0 +1,9 @@ +table cytoBandIdeo +"cytoBandIdeo Describes the positions of cytogenetic bands with a chromosome" + ( + string chrom; "Reference sequence chromosome or scaffold" + uint chromStart; "Start position in chromosome" + uint chromEnd; "End position in chromosome" + string name; "Name of item" + string gieStain; "Giemsa stain results: gneg gpos25 gpos50 gpos75 gpos100 acen gvar stalk" + ) \ No newline at end of file
--- a/hubArchiveCreator.py Tue May 09 15:42:43 2017 -0400 +++ b/hubArchiveCreator.py Wed May 31 11:35:16 2017 -0400 @@ -20,6 +20,7 @@ from BedSimpleRepeats import BedSimpleRepeats from BedSpliceJunctions import BedSpliceJunctions from Bed import Bed +from cytoBand import cytoBand from BigWig import BigWig from util.Fasta import Fasta from util.Filters import TraceBackFormatter @@ -29,6 +30,7 @@ from TrackHub import TrackHub from bigPsl import bigPsl from BedBlastAlignments import BedBlastAlignments +from BigBed import BigBed # TODO: Verify each subprocessed dependency is accessible [gff3ToGenePred, genePredToBed, twoBitInfo, faToTwoBit, bedToBigBed, sort @@ -55,6 +57,9 @@ # Generic Bed (Blastx transformed to bed) parser.add_argument('--bed', action='append', help='Bed generic format') + #cytoBandIdeo + parser.add_argument('--cytoBand', action='append', help='Cytoband Track, using cytoBandIdeo.as') + # BigPsl (blat alignment) parser.add_argument('--bigpsl', action='append', help='bigPsl format, using bigPsl.as') @@ -70,6 +75,9 @@ # Psl Management parser.add_argument('--psl', action='append', help='Psl format') + # BigBed Management + parser.add_argument('--bigbed', action='append', help='BigBed format') + # TODO: Check if the running directory can have issues if we run the tool outside parser.add_argument('-d', '--directory', help='Running tool directory, where to find the templates. Default is running directory') @@ -124,6 +132,7 @@ # EXTRA_DATA could be anything, for example the index of a BAM => {"index", FILE_PATH} array_inputs_bam = args.bam array_inputs_bed_generic = args.bed + array_inputs_bed_cytoBand = args.cytoBand array_inputs_bed_simple_repeats = args.bedSimpleRepeats array_inputs_bed_splice_junctions = args.bedSpliceJunctions array_inputs_bigwig = args.bigwig @@ -132,6 +141,7 @@ array_inputs_psl = args.psl array_inputs_bigpsl = args.bigpsl array_inputs_bed_blast_alignments = args.bedBlastAlignments + array_inputs_bigbed = args.bigbed outputFile = args.output @@ -152,6 +162,7 @@ for (inputs, datatype_class) in [ (array_inputs_bam, Bam), (array_inputs_bed_generic, Bed), + (array_inputs_bed_cytoBand, cytoBand), (array_inputs_bigwig, BigWig), (array_inputs_bed_simple_repeats, BedSimpleRepeats), (array_inputs_bed_splice_junctions, BedSpliceJunctions), @@ -159,7 +170,8 @@ (array_inputs_gtf, Gtf), (array_inputs_psl, Psl), (array_inputs_bigpsl, bigPsl), - (array_inputs_bed_blast_alignments, BedBlastAlignments)]: + (array_inputs_bed_blast_alignments, BedBlastAlignments), + (array_inputs_bigbed, BigBed)]: if inputs: all_datatype_dictionary.update(create_ordered_datatype_objects(datatype_class, inputs, inputs_data))
--- a/hubArchiveCreator.xml Tue May 09 15:42:43 2017 -0400 +++ b/hubArchiveCreator.xml Wed May 31 11:35:16 2017 -0400 @@ -5,8 +5,10 @@ </description> <requirements> - <requirement type="package" version="1.0">ucsc_tools_340</requirement> + <requirement type="package" version="340">ucsc_hac</requirement> <requirement type="package" version="1.2">samtools</requirement> + <requirement type="package" version="340">ucsc_bigwig</requirement> + <requirement type="package" version="340">ucsc_bigbed</requirement> </requirements> <stdio> @@ -90,6 +92,11 @@ #silent $prepare_json($f.formatChoice.bedChoice.BED, $index_track_final, extra_data_dict) #end if + #if $f.formatChoice.bedChoice.bed_select == "bed_cytoBand" + --cytoBand $f.formatChoice.bedChoice.BED_cytoBand + #silent $prepare_json($f.formatChoice.bedChoice.BED_cytoBand, $index_track_final, + extra_data_dict) + #end if #if $f.formatChoice.bedChoice.bed_select == "bed_simple_repeats_option" --bedSimpleRepeats $f.formatChoice.bedChoice.BED_simple_repeats #silent $prepare_json($f.formatChoice.bedChoice.BED_simple_repeats, $index_track_final, @@ -102,12 +109,12 @@ #end if #if $f.formatChoice.bedChoice.bed_select == "bed_blast_alignment_option" --bedBlastAlignments $f.formatChoice.bedChoice.BED_blast_alignment - #silent $prepare_json($f.formatChoice.bedChoice.BED_blast_alignment, $index_track_final, + #silent $prepare_json($f.formatChoice.bedChoice.BED_blast_alignment, $index_track_final, extra_data_dict) #end if #if $f.formatChoice.bedChoice.bed_select == "bed_blat_alignment_option" --bigpsl $f.formatChoice.bedChoice.BED_blat_alignment - #silent $prepare_json($f.formatChoice.bedChoice.BED_blat_alignment, $index_track_final, + #silent $prepare_json($f.formatChoice.bedChoice.BED_blat_alignment, $index_track_final, extra_data_dict) #end if #end if @@ -121,6 +128,11 @@ #silent $prepare_json($f.formatChoice.BIGWIG, $index_track_final, extra_data_dict) #end if + #if $f.formatChoice.format_select == "bigbed" + --bigbed $f.formatChoice.BIGBED + #silent $prepare_json($f.formatChoice.BIGBED, $index_track_final, + extra_data_dict) + #end if #if $f.formatChoice.format_select == "gff3" --gff3 $f.formatChoice.GFF3 #silent $prepare_json($f.formatChoice.GFF3, $index_track_final, @@ -180,6 +192,7 @@ <option value="bed">BED</option> <option value="psl">PSL</option> <option value="bigwig">BIGWIG</option> + <option value="bigbed">BIGBED</option> <option value="gff3">GFF3</option> <option value="gtf">GTF</option> </param> @@ -200,11 +213,12 @@ </valid> </sanitizer> </param> - </when> + </when> <when value="bed"> <conditional name="bedChoice"> <param name="bed_select" type="select" label="Bed Choice"> <option value="bed_generic" selected="true">BED Generic (bed3+)</option> + <option value="bed_cytoBand">cytoBand Track (bed4)</option> <option value="bed_simple_repeats_option">BED Simple repeat (bed4+12 / simpleRepeat.as)</option> <option value="bed_splice_junctions_option">BED Splice junctions (bed12+1 / spliceJunctions.as)</option> <option value="bed_blast_alignment_option">Blast alignments (bed12+12 / bigPsl.as)</option> @@ -219,6 +233,14 @@ /> </when> + <when value="bed_cytoBand"> + <param + format="bed" + name="BED_cytoBand" + type="data" + label="cytoBand Track (Bed4)" + /> + </when> <when value="bed_simple_repeats_option"> <param format="bed" @@ -293,6 +315,22 @@ </sanitizer> </param> </when> + <when value="bigbed"> + <param + format="bigbed" + name="BIGBED" + type="data" + label="BIGBED File" + /> + <param name="longLabel" type="text" size="30" label="Track name" /> + <param name="track_color" type="color" label="Track color" value="#000000"> + <sanitizer> + <valid initial="string.letters,string.digits"> + <add value="#"/> + </valid> + </sanitizer> + </param> + </when> <when value="gff3"> <param format="gff3" @@ -474,7 +512,7 @@ /> </output> </test> - + <!-- Test with Psl --> <test> @@ -1029,4 +1067,8 @@ This Galaxy tool permits to prepare your files to be ready for Assembly Hub visualization. </help> + + <citations> + <citation type="doi">10.7490/f1000research.1112719.1</citation> + </citations> </tool>
--- a/templates/trackDb/layout.txt Tue May 09 15:42:43 2017 -0400 +++ b/templates/trackDb/layout.txt Wed May 31 11:35:16 2017 -0400 @@ -1,14 +1,32 @@ % for trackDb in trackDbs: - ## See this http://genome.ucsc.edu/goldenPath/help/hgTrackHubHelp.html - track ${trackDb.trackName} - longLabel ${trackDb.longLabel} - shortLabel ${trackDb.shortLabel} - bigDataUrl ${trackDb.trackDataURL} - type ${trackDb.trackType} - visibility ${trackDb.visibility} - thickDrawItem ${trackDb.thickDrawItem} - priority ${trackDb.priority} - color ${trackDb.track_color} - group ${trackDb.group_name.lower().replace(' ', '_')} + % if "bigWig" in trackDb.trackType: + +track ${trackDb.trackName} +longLabel ${trackDb.longLabel} +shortLabel ${trackDb.shortLabel} +bigDataUrl ${trackDb.trackDataURL} +visibility ${trackDb.visibility} +priority ${trackDb.priority} +color ${trackDb.track_color} +group ${trackDb.group_name.lower().replace(' ', '_')} +type ${trackDb.trackType} +autoScale on +maxHeightPixels 100:32:8 +windowingFunction mean+whiskers + % else: + +## See this http://genome.ucsc.edu/goldenPath/help/hgTrackHubHelp.html +track ${trackDb.trackName} +longLabel ${trackDb.longLabel} +shortLabel ${trackDb.shortLabel} +bigDataUrl ${trackDb.trackDataURL} +type ${trackDb.trackType} +visibility ${trackDb.visibility} +thickDrawItem ${trackDb.thickDrawItem} +priority ${trackDb.priority} +color ${trackDb.track_color} +group ${trackDb.group_name.lower().replace(' ', '_')} + + % endif % endfor
--- a/tool_dependencies.xml Tue May 09 15:42:43 2017 -0400 +++ b/tool_dependencies.xml Wed May 31 11:35:16 2017 -0400 @@ -1,6 +1,5 @@ <?xml version="1.0"?> <tool_dependency> - <!-- UCSC Tools in --> <!-- Useful for HAC are: - twoBitInfo - sort @@ -16,23 +15,15 @@ <package name="samtools" version="1.2"> <repository changeset_revision="f6ae3ba3f3c1" name="package_samtools_1_2" owner="iuc" prior_installation_required="True" toolshed="https://toolshed.g2.bx.psu.edu" /> </package> - - <package name="ucsc_tools_340" version="1.0"> - <install version="1.0"> - <actions_group> - <actions architecture="x86_64" os="linux"> - <action type="download_by_url">http://old-gep.wustl.edu/~galaxy/ucsc_tools_340.tar.gz</action> - <action type="move_directory_files"> - <source_directory>.</source_directory> - <destination_directory>$INSTALL_DIR/bin</destination_directory> - </action> - </actions> - <action type="set_environment"> - <environment_variable action="prepend_to" name="PATH">$INSTALL_DIR/bin</environment_variable> - </action> - </actions_group> - </install> - <readme>The well known UCSC tools from Jim Kent.</readme> + <package name="ucsc_hac" version="340"> + <repository changeset_revision="a819439b6d76" name="package_ucsc_hac_340" owner="yating-l" toolshed="https://toolshed.g2.bx.psu.edu" /> + </package> + <package name="ucsc_bigbed" version="340"> + <repository changeset_revision="b23d341df89d" name="package_ucsc_bigbed_340" owner="yating-l" toolshed="https://toolshed.g2.bx.psu.edu" /> </package> - + + <package name="ucsc_bigwig" version="340"> + <repository changeset_revision="b1fb46f92da0" name="package_ucsc_bigwig_340" owner="yating-l" toolshed="https://toolshed.g2.bx.psu.edu" /> + </package> + </tool_dependency>
--- a/util/subtools.py Tue May 09 15:42:43 2017 -0400 +++ b/util/subtools.py Wed May 31 11:35:16 2017 -0400 @@ -250,6 +250,8 @@ # See the "track" Common settings at: #https://genome.ucsc.edu/goldenpath/help/trackDb/trackDbHub.html#bigPsl_-_Pairwise_Alignments def fixName(filename): + if filename == 'cytoBandIdeo': + return filename valid_chars = "_%s%s" % (string.ascii_letters, string.digits) sanitize_name = ''.join([c if c in valid_chars else '_' for c in filename]) sanitize_name = "gonramp_" + sanitize_name