Mercurial > repos > rmarenco > hubarchivecreator
changeset 16:3233451a3bd6 draft
planemo upload for repository https://github.com/goeckslab/hub-archive-creator commit fc73ec22a0db3ab09c4ac13dc58f0b54ae37845c
author | rmarenco |
---|---|
date | Sun, 25 Sep 2016 11:25:38 -0400 |
parents | 2a45cd656e8e |
children | c02720d1afee |
files | Bam.py Bam.pyc Bed.py Bed.pyc BedSimpleRepeats.py BedSimpleRepeats.pyc BigWig.py BigWig.pyc Datatype.py Datatype.pyc Gff3.py Gff3.pyc Gtf.py Gtf.pyc Psl.py Psl.pyc TrackDb.py TrackDb.pyc TrackHub.py TrackHub.pyc bigPsl.as hubArchiveCreator.py hubArchiveCreator.xml templates/display.txt templates/trackDb/layout.txt todo.md trackHub/tracks_partial.py util/subtools.py util/subtools.pyc |
diffstat | 29 files changed, 374 insertions(+), 77 deletions(-) [+] |
line wrap: on
line diff
--- a/Bam.py Fri Sep 02 15:41:51 2016 -0400 +++ b/Bam.py Sun Sep 25 11:25:38 2016 -0400 @@ -5,6 +5,7 @@ Class to handle Bam files to UCSC TrackHub """ +import logging import os import shutil @@ -31,6 +32,10 @@ self.priority = self.data_bam["order_index"] self.index_bam = self.data_bam["index"] + # TODO: Think about how to avoir repetition of the color treatment + self.track_color = self.data_bam["track_color"] + + logging.debug("Color of bam: {0}".format(self.track_color)) #print "Creating TrackHub BAM from (falsePath: %s; name: %s)" % ( self.input_bam_false_path, self.name_bam) @@ -49,7 +54,8 @@ self.createTrack(file_path=self.name_bam, track_name=self.name_bam, long_label=self.name_bam, track_type='bam', visibility='pack', priority=self.priority, - track_file=bam_index_file_path) + track_file=bam_index_file_path, + track_color=self.track_color) # # dataURL = "tracks/%s" % self.name_bam #
--- a/Bed.py Fri Sep 02 15:41:51 2016 -0400 +++ b/Bed.py Sun Sep 25 11:25:38 2016 -0400 @@ -23,6 +23,7 @@ self.data_bed_generic = data_bed_generic self.name_bed_generic = self.data_bed_generic["name"] self.priority = self.data_bed_generic["order_index"] + self.track_color = self.data_bed_generic["track_color"] # Sort processing subtools.sort(self.inputBedGeneric, self.sortedBedFile.name) @@ -42,7 +43,8 @@ track_name=trackName, long_label=self.name_bed_generic, track_type='bigBed', visibility='dense', priority=self.priority, - track_file=myBigBedFilePath) + track_file=myBigBedFilePath, + track_color=self.track_color) # dataURL = "tracks/%s" % trackName #
--- a/BedSimpleRepeats.py Fri Sep 02 15:41:51 2016 -0400 +++ b/BedSimpleRepeats.py Sun Sep 25 11:25:38 2016 -0400 @@ -17,6 +17,7 @@ self.input_bed_simple_repeats_false_path = input_bed_simple_repeats_false_path self.name_bed_simple_repeats = data_bed_simple_repeats["name"] self.priority = data_bed_simple_repeats["order_index"] + self.track_color = data_bed_simple_repeats["track_color"] sortedBedFile = tempfile.NamedTemporaryFile(suffix=".sortedBed") @@ -42,7 +43,8 @@ track_name=trackName, long_label=self.name_bed_simple_repeats, track_type='bigBed 4 +', visibility='dense', priority=self.priority, - track_file=myBigBedFilePath) + track_file=myBigBedFilePath, + track_color=self.track_color) # dataURL = "tracks/%s" % trackName #
--- a/BigWig.py Fri Sep 02 15:41:51 2016 -0400 +++ b/BigWig.py Sun Sep 25 11:25:38 2016 -0400 @@ -18,6 +18,7 @@ self.input_bigwig_path = input_bigwig_path self.name_bigwig = data_bigwig["name"] self.priority = data_bigwig["order_index"] + self.track_color = data_bigwig["track_color"] #print "Creating TrackHub BigWig from (falsePath: %s; name: %s)" % ( self.input_bigwig_path, self.name_bigwig ) @@ -32,7 +33,8 @@ long_label=self.name_bigwig, track_type='bigWig', visibility='full', priority=self.priority, - track_file=myBigWigFilePath) + track_file=myBigWigFilePath, + track_color=self.track_color) # dataURL = "tracks/%s" % trackName #
--- a/Datatype.py Fri Sep 02 15:41:51 2016 -0400 +++ b/Datatype.py Sun Sep 25 11:25:38 2016 -0400 @@ -76,7 +76,7 @@ file_path=None, track_name=None, long_label=None, thick_draw_item='off', short_label=None, track_type=None, visibility=None, priority=None, - track_file=None): + track_file=None, track_color='#000000'): # TODO: Remove the hardcoded "tracks" by the value used as variable from myTrackFolderPath data_url = "tracks/%s" % file_path @@ -89,6 +89,12 @@ long_label = long_label.replace("_", " ") short_label = short_label.replace("_", " ") + #TODO: Check if rgb or hexa + # Convert hexa to rgb array + hexa_without_sharp = track_color.lstrip('#') + rgb_array = [int(hexa_without_sharp[i:i+2], 16) for i in (0, 2, 4)] + rgb_ucsc = ','.join(map(str, rgb_array)) + track_db = TrackDb( trackName=track_name, longLabel=long_label, @@ -98,6 +104,7 @@ visibility=visibility, thickDrawItem=thick_draw_item, priority=priority, + track_color=rgb_ucsc ) # Return the Bam Track Object
--- a/Gff3.py Fri Sep 02 15:41:51 2016 -0400 +++ b/Gff3.py Sun Sep 25 11:25:38 2016 -0400 @@ -19,6 +19,7 @@ self.input_Gff3_false_path = input_Gff3_false_path self.name_gff3 = data_gff3["name"] self.priority = data_gff3["order_index"] + self.track_color = data_gff3["track_color"] # TODO: See if we need these temporary files as part of the generated files unsorted_genePred_file = tempfile.NamedTemporaryFile(bufsize=0, suffix=".genePred") @@ -57,6 +58,7 @@ long_label=self.name_gff3, track_type='bigGenePred', visibility='dense', priority=self.priority, - track_file=myBigBedFilePath) + track_file=myBigBedFilePath, + track_color=self.track_color) print("- Gff3 %s created" % self.name_gff3)
--- a/Gtf.py Fri Sep 02 15:41:51 2016 -0400 +++ b/Gtf.py Sun Sep 25 11:25:38 2016 -0400 @@ -20,6 +20,7 @@ self.input_gtf_false_path = input_gtf_false_path self.name_gtf = data_gtf["name"] self.priority = data_gtf["order_index"] + self.track_color = data_gtf["track_color"] #print "Creating TrackHub GTF from (falsePath: %s; name: %s)" % ( self.input_gtf_false_path, self.name_gtf) @@ -59,6 +60,7 @@ track_name=trackName, long_label=self.name_gtf, track_type='bigGenePred', visibility='dense', priority=self.priority, - track_file=myBigBedFilePath) + track_file=myBigBedFilePath, + track_color=self.track_color) print("- Gtf %s created" % self.name_gtf)
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/Psl.py Sun Sep 25 11:25:38 2016 -0400 @@ -0,0 +1,57 @@ +import logging +import os +import tempfile + +# Internal dependencies +from Datatype import Datatype +from util import subtools + + +class Psl(Datatype): + def __init__(self, input_psl_path, data_psl): + super(Psl, self).__init__() + + self.track = None + + self.input_psl_path = input_psl_path + self.name_psl = data_psl["name"] + self.priority = data_psl["order_index"] + self.track_color = data_psl["track_color"] + + # Temporary files + unsorted_bed_formatted_psl_file = tempfile.NamedTemporaryFile(suffix='.psl') + sorted_bed_formatted_psl_file = tempfile.NamedTemporaryFile(suffix='psl') + + # Get the bed12+12 with pslToBigPsl + subtools.pslToBigPsl(input_psl_path, unsorted_bed_formatted_psl_file.name) + + # Sort the formatted psl into sorted_bed_formatted_psl_file + subtools.sort(unsorted_bed_formatted_psl_file.name, sorted_bed_formatted_psl_file.name) + + # Get the binary indexed bigPsl with bedToBigBed + trackName = "".join((self.name_psl, ".bb")) + + auto_sql_option = os.path.join(self.tool_directory, 'bigPsl.as') + + my_big_psl_file_path = os.path.join(self.myTrackFolderPath, trackName) + + logging.debug("Hello") + + with open(my_big_psl_file_path, 'w') as big_psl_file: + subtools.bedToBigBed(sorted_bed_formatted_psl_file.name, + self.chromSizesFile.name, + big_psl_file.name, + autoSql=auto_sql_option, + typeOption='bed12+12', + tab=True) + + # Create the Track Object + self.createTrack(file_path=trackName, + track_name=trackName, + long_label=self.name_psl, + track_type='bigPsl', visibility='dense', + priority=self.priority, + track_file=my_big_psl_file_path, + track_color=self.track_color) + + print("- BigPsl %s created" % self.name_psl) \ No newline at end of file
--- a/TrackDb.py Fri Sep 02 15:41:51 2016 -0400 +++ b/TrackDb.py Sun Sep 25 11:25:38 2016 -0400 @@ -4,7 +4,7 @@ """docstring for TrackDb""" def __init__(self, trackName="", longLabel="", shortLabel="", trackDataURL="", trackType="", visibility="", - thickDrawItem='off', priority="0"): + thickDrawItem='off', priority="0", track_color="#000000"): super(TrackDb, self).__init__() self.trackName = trackName @@ -15,3 +15,4 @@ self.visibility = visibility self.thickDrawItem = thickDrawItem self.priority = priority + self.track_color = track_color
--- a/TrackHub.py Fri Sep 02 15:41:51 2016 -0400 +++ b/TrackHub.py Sun Sep 25 11:25:38 2016 -0400 @@ -1,6 +1,7 @@ #!/usr/bin/python # -*- coding: utf8 -*- +import logging import os import tempfile import shutil @@ -88,14 +89,36 @@ # TODO: We are basically looping two times: One time with os.walk, Second time # with the template. We could improve that if the number of files begins to be really important list_relative_file_path = [ ] + + # TODO: Create classes Tree to manage this => Better readibility and maintenability + def create_tree(array_path, tree, relative_array_file_path, level=0): + cur_relative_file_path = '/'.join(relative_array_file_path[:level+1]) + if array_path[0] in tree.keys(): + create_tree(array_path[1:], tree[array_path[0]][0], + relative_array_file_path, level+1) + else: + tree[array_path[0]] = ({}, cur_relative_file_path) + # TODO: Manage also the links of the directories => No link? + # => Managed in display.txt, but could also be managed there + # If we are don't have any sub-vertices + if len(array_path) == 1: + # We create the path to it + return + else: + create_tree(array_path[1:], tree[array_path[0]][0], + relative_array_file_path, level + 1) + + walkable_tree = {} for root, dirs, files in os.walk(self.extra_files_path): + # Prepare the tree from to perform a Depth First Search for file in files: relative_directory = os.path.relpath(root, self.extra_files_path) relative_file_path = os.path.join(relative_directory, file) - list_relative_file_path.append(relative_file_path) + array_path = relative_file_path.split('/') + create_tree(array_path, walkable_tree, array_path, 0) htmlMakoRendered = mytemplate.render( - list_relative_file_path=list_relative_file_path + walkable_tree=walkable_tree ) htmlOutput.write(htmlMakoRendered)
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/bigPsl.as Sun Sep 25 11:25:38 2016 -0400 @@ -0,0 +1,33 @@ +table bigPsl +"bigPsl pairwise alignment" + ( + string chrom; "Reference sequence chromosome or scaffold" + uint chromStart; "Start position in chromosome" + uint chromEnd; "End position in chromosome" + string name; "Name or ID of item, ideally both human readable and unique" + uint score; "Score (0-1000)" + char[1] strand; "+ or - for strand" + uint thickStart; "Start of where display should be thick (start codon)" + uint thickEnd; "End of where display should be thick (stop codon)" + uint reserved; "RGB value (use R,G,B string in input file)" + int blockCount; "Number of blocks" + int[blockCount] blockSizes; "Comma separated list of block sizes" + int[blockCount] chromStarts; "Start positions relative to chromStart" + + uint oChromStart;"Start position in other chromosome" + uint oChromEnd; "End position in other chromosome" + char[1] oStrand; "+ or - for other strand" + uint oChromSize; "Size of other chromosome." + int[blockCount] oChromStarts; "Start positions relative to oChromStart" + + lstring oSequence; "Sequence on other chrom (or edit list, or empty)" + string oCDS; "CDS in NCBI format" + + uint chromSize;"Size of target chromosome" + + uint match; "Number of bases matched." + uint misMatch; " Number of bases that don't match " + uint repMatch; " Number of bases that match but are part of repeats " + uint nCount; " Number of 'N' bases " + ) +
--- a/hubArchiveCreator.py Fri Sep 02 15:41:51 2016 -0400 +++ b/hubArchiveCreator.py Sun Sep 25 11:25:38 2016 -0400 @@ -24,6 +24,7 @@ from util.Filters import TraceBackFormatter from Gff3 import Gff3 from Gtf import Gtf +from Psl import Psl from TrackHub import TrackHub # TODO: Verify each subprocessed dependency is accessible [gff3ToGenePred, genePredToBed, twoBitInfo, faToTwoBit, bedToBigBed, sort @@ -54,6 +55,9 @@ # Bam Management parser.add_argument('--bam', action='append', help='Bam format') + # Psl Management + parser.add_argument('--psl', action='append', help='Psl format') + # TODO: Check if the running directory can have issues if we run the tool outside parser.add_argument('-d', '--directory', help='Running tool directory, where to find the templates. Default is running directory') @@ -106,12 +110,13 @@ # These inputs are populated in the Galaxy Wrapper xml and are in this format: # ARRAY[DICT{FILE_PATH: DICT{NAME: NAME_VALUE, EXTRA_DATA: EXTRA_DATA_VALUE}}] # EXTRA_DATA could be anything, for example the index of a BAM => {"index", FILE_PATH} - array_inputs_gff3 = args.gff3 - array_inputs_bed_simple_repeats = args.bedSimpleRepeats + array_inputs_bam = args.bam array_inputs_bed_generic = args.bed + array_inputs_bed_simple_repeats = args.bedSimpleRepeats + array_inputs_bigwig = args.bigwig + array_inputs_gff3 = args.gff3 array_inputs_gtf = args.gtf - array_inputs_bam = args.bam - array_inputs_bigwig = args.bigwig + array_inputs_psl = args.psl outputFile = args.output @@ -129,12 +134,14 @@ all_datatype_dictionary = {} - for (inputs, datatype_class) in [(array_inputs_gff3, Gff3), - (array_inputs_bed_simple_repeats, BedSimpleRepeats), - (array_inputs_bed_generic, Bed), - (array_inputs_gtf, Gtf), - (array_inputs_bam, Bam), - (array_inputs_bigwig, BigWig)]: + for (inputs, datatype_class) in [ + (array_inputs_bam, Bam), + (array_inputs_bed_generic, Bed), + (array_inputs_bigwig, BigWig), + (array_inputs_bed_simple_repeats, BedSimpleRepeats), + (array_inputs_gff3, Gff3), + (array_inputs_gtf, Gtf), + (array_inputs_psl, Psl)]: if inputs: all_datatype_dictionary.update(create_ordered_datatype_objects(datatype_class, inputs, inputs_data)) @@ -187,6 +194,8 @@ for input_false_path in array_inputs: for key, data_value in inputs_data.items(): if key == input_false_path: + logging.debug("input_false_path: " + input_false_path) + logging.debug("data_value: " + str(data_value)) extensionObject = ExtensionClass(input_false_path, data_value) datatype_dictionary.update({data_value["order_index"]: extensionObject}) return datatype_dictionary
--- a/hubArchiveCreator.xml Fri Sep 02 15:41:51 2016 -0400 +++ b/hubArchiveCreator.xml Sun Sep 25 11:25:38 2016 -0400 @@ -12,6 +12,7 @@ <requirement type="package" version="332">ucsc-genepredtobiggenepred</requirement> <requirement type="package" version="332">ucsc-gff3togenepred</requirement> <requirement type="package" version="332">ucsc-gtftogenepred</requirement> + <requirement type="package" version="332">ucsc-psltobigpsl</requirement> <requirement type="package" version="332">ucsc-twobitinfo</requirement> <requirement type="package" version="1.3">samtools</requirement> <!-- ToolShed dependencies --> @@ -60,33 +61,46 @@ #end def #for $i, $f in enumerate( $format ) + #set track_color = str($f.formatChoice.track_color) + #if $f.formatChoice.format_select == "bam" --bam $f.formatChoice.BAM #set bam_index = $f.formatChoice.BAM.metadata.bam_index - #silent $prepare_json($f.formatChoice.BAM, $i, {"index": $bam_index}) + #silent $prepare_json($f.formatChoice.BAM, $i, {"index": $bam_index, + "track_color": $track_color}) #end if #if $f.formatChoice.format_select == "bed" #if $f.formatChoice.bedChoice.bed_select == "bed" --bed $f.formatChoice.bedChoice.BED - #silent $prepare_json($f.formatChoice.bedChoice.BED, $i) + #silent $prepare_json($f.formatChoice.bedChoice.BED, $i, + {"track_color": $track_color}) #end if #if $f.formatChoice.bedChoice.bed_select == "bed_simple_repeats_option" --bedSimpleRepeats $f.formatChoice.bedChoice.BED_simple_repeats - #silent $prepare_json($f.formatChoice.bedChoice.BED_simple_repeats, $i) + #silent $prepare_json($f.formatChoice.bedChoice.BED_simple_repeats, $i, + {"track_color": $track_color}) #end if #end if + #if $f.formatChoice.format_select == "psl" + --psl $f.formatChoice.PSL + #silent $prepare_json($f.formatChoice.PSL, $i, + {"track_color": $track_color}) + #end if #if $f.formatChoice.format_select == "bigwig" --bigwig $f.formatChoice.BIGWIG - #silent $prepare_json($f.formatChoice.BIGWIG, $i) + #silent $prepare_json($f.formatChoice.BIGWIG, $i, + {"track_color": $track_color}) #end if #if $f.formatChoice.format_select == "gff3" --gff3 $f.formatChoice.GFF3 - #silent $prepare_json($f.formatChoice.GFF3, $i) + #silent $prepare_json($f.formatChoice.GFF3, $i, + {"track_color": $track_color}) #end if #if $f.formatChoice.format_select == "gtf" ## Add also GTF from Agustus? See https://github.com/ENCODE-DCC/kentUtils/issues/8 --gtf $f.formatChoice.GTF - #silent $prepare_json($f.formatChoice.GTF, $i) + #silent $prepare_json($f.formatChoice.GTF, $i, + {"track_color": $track_color}) #end if #end for @@ -130,6 +144,7 @@ <param name="format_select" type="select" label="Format"> <option value="bam" selected="true">BAM</option> <option value="bed">BED</option> + <option value="psl">PSL</option> <option value="bigwig">BIGWIG</option> <option value="gff3">GFF3</option> <option value="gtf">GTF</option> @@ -142,30 +157,14 @@ type="data" label="BAM File" /> - </when> - <when value="bigwig"> - <param - format="bigwig" - name="BIGWIG" - type="data" - label="BIGWIG File" - /> - </when> - <when value="gff3"> - <param - format="gff3" - name="GFF3" - type="data" - label="GFF3 File" - /> - </when> - <when value="gtf"> - <param - format="gtf" - name="GTF" - type="data" - label="GTF File" - /> + <!-- TODO: Find a solution to avoid repetition and to generate a new color depending on the others --> + <param name="track_color" type="color" label="Track color" value="#000000"> + <sanitizer> + <valid initial="string.letters,string.digits"> + <add value="#"/> + </valid> + </sanitizer> + </param> </when> <when value="bed"> <conditional name="bedChoice"> @@ -180,6 +179,7 @@ type="data" label="Generic Bed File Choice" /> + </when> <when value="bed_simple_repeats_option"> <param @@ -190,6 +190,73 @@ /> </when> </conditional> + <param name="track_color" type="color" label="Track color" value="#000000"> + <sanitizer> + <valid initial="string.letters,string.digits"> + <add value="#"/> + </valid> + </sanitizer> + </param> + </when> + <when value="psl"> + <param + format="psl" + name="PSL" + type="data" + label="PSL File" + /> + <param name="track_color" type="color" label="Track color" value="#000000"> + <sanitizer> + <valid initial="string.letters,string.digits"> + <add value="#"/> + </valid> + </sanitizer> + </param> + </when> + <when value="bigwig"> + <param + format="bigwig" + name="BIGWIG" + type="data" + label="BIGWIG File" + /> + <param name="track_color" type="color" label="Track color" value="#000000"> + <sanitizer> + <valid initial="string.letters,string.digits"> + <add value="#"/> + </valid> + </sanitizer> + </param> + </when> + <when value="gff3"> + <param + format="gff3" + name="GFF3" + type="data" + label="GFF3 File" + /> + <param name="track_color" type="color" label="Track color" value="#000000"> + <sanitizer> + <valid initial="string.letters,string.digits"> + <add value="#"/> + </valid> + </sanitizer> + </param> + </when> + <when value="gtf"> + <param + format="gtf" + name="GTF" + type="data" + label="GTF File" + /> + <param name="track_color" type="color" label="Track color" value="#000000"> + <sanitizer> + <valid initial="string.letters,string.digits"> + <add value="#"/> + </valid> + </sanitizer> + </param> </when> </conditional> </repeat>
--- a/templates/display.txt Fri Sep 02 15:41:51 2016 -0400 +++ b/templates/display.txt Sun Sep 25 11:25:38 2016 -0400 @@ -5,11 +5,48 @@ The following has been generated by Hub Archive Creator: </p> <ul> - % for relative_file_path in list_relative_file_path: - <li> - <a href="${relative_file_path}">${relative_file_path}</a> - </li> - % endfor + ${print_tree(walkable_tree)} </ul> </body> -</html> \ No newline at end of file +</html> + +<%def name="print_tree(tree)"> + % if len(tree) == 0: + ## We do nothing, we are called by a leaf + ## If we are there, this is not normal though + ## TODO: Manage the error + return + % else: + % for vertex in tree: + % if len(tree[vertex][0]) > 0: + <li> + ${vertex} + </li> + <ul> + ${print_tree(tree[vertex][0])} + </ul> + % else: + <li> + <a href="${tree[vertex][1]}">${vertex}</a> + </li> + % endif + % endfor + % endif +</%def> + +<%doc> + def recurse_print_tree(tree, level): + if len(tree) == 0: + return + + for vertex in tree: + composite_name = vertex + bullet_point = '<li><a href="{0}>{0}</a></li>'.format(composite_name) + rval.append(bullet_point) + # Parent, so need to create a sub <ul> + if len(tree[vertex]) > 0: + rval.append('<ul>') + print_tree(tree[vertex], level+1) + rval.append('</ul>') + +</%doc> \ No newline at end of file
--- a/templates/trackDb/layout.txt Fri Sep 02 15:41:51 2016 -0400 +++ b/templates/trackDb/layout.txt Sun Sep 25 11:25:38 2016 -0400 @@ -8,5 +8,6 @@ visibility ${trackDb.visibility} thickDrawItem ${trackDb.thickDrawItem} priority ${trackDb.priority} + color ${trackDb.track_color} % endfor
--- a/todo.md Fri Sep 02 15:41:51 2016 -0400 +++ b/todo.md Sun Sep 25 11:25:38 2016 -0400 @@ -56,6 +56,7 @@ - [ ] Add a debug mode to have more outputs - [ ] Improve the standard output of HAC - [ ] Find why a $ (newline) is added when installing dependencies from tool_dependencies.xml +- [ ] Sort the order of the HTML directories/files ### DONE
--- a/trackHub/tracks_partial.py Fri Sep 02 15:41:51 2016 -0400 +++ b/trackHub/tracks_partial.py Sun Sep 25 11:25:38 2016 -0400 @@ -31,11 +31,42 @@ '<html><head><title>Files for Composite Dataset (%s)</title></head><p/>\ This composite dataset is composed of the following files:<p/><ul>' % ( self.file_ext)] - for composite_name, composite_file in self.get_composite_files(dataset=dataset).iteritems(): - opt_text = '' - if composite_file.optional: - opt_text = ' (optional)' - rval.append('<li><a href="%s">%s</a>%s' % (composite_name, composite_name, opt_text)) + + def create_tree(path, tree): + if path[0] in tree: + create_tree(path[1:], tree[path[0]]) + else: + tree[path[0]] = {} + if len(path) == 1: + return + else: + create_tree(path[1:], tree[path[0]]) + + def print_tree(tree, level): + if len(tree) == 0: + return + + for vertex in tree: + composite_name = vertex + bullet_point = '<li><a href="{0}>{0}</a></li>'.format(composite_name) + rval.append(bullet_point) + # Parent, so need to create a sub <ul> + if len(tree[vertex]) > 0: + rval.append('<ul>') + print_tree(tree[vertex], level+1) + rval.append('</ul>') + + walkable_tree = {} + + for composite_name_full_path, composite_file in self.get_composite_files(dataset=dataset).iteritems(): + paths = composite_name_full_path.split('/') + # Prepare the tree from to perform a Depth First Search + create_tree(paths, walkable_tree) + + # Perform a Depth First Search to print all the directory and files properly + print_tree(walkable_tree, 0) + + # rval.append('<li><a href="%s">%s</a>%s' % (composite_name, composite_name, opt_text)) rval.append('</ul></html>') return "\n".join(rval)
--- a/util/subtools.py Fri Sep 02 15:41:51 2016 -0400 +++ b/util/subtools.py Sun Sep 25 11:25:38 2016 -0400 @@ -28,10 +28,9 @@ It maps the signature of subprocess.check_call: See https://docs.python.org/2/library/subprocess.html#subprocess.check_call """ - stdin = kwargs.get('stdin') - stdout = kwargs.get('stdout') - stderr = kwargs.get('stderr') - shell = kwargs.get('shell') + stdout = kwargs.get('stdout', subprocess.PIPE) + stderr = kwargs.get('stderr', subprocess.PIPE) + shell = kwargs.get('shell', False) cmd = array_call[0] @@ -45,15 +44,26 @@ # TODO: Use universal_newlines option from Popen? try: - p = subprocess.Popen(array_call, stdout=subprocess.PIPE, stderr=subprocess.PIPE, shell=shell) + p = subprocess.Popen(array_call, stdout=stdout, stderr=stderr, shell=shell) + + # TODO: Change this because of possible memory issues => https://docs.python.org/2/library/subprocess.html#subprocess.Popen.communicate + output, error = p.communicate() - logging.debug("\t{0}".format(output)) + if stdout == subprocess.PIPE: + logging.debug("\t{0}".format(output)) + else: + logging.debug("\tOutput in file {0}".format(stdout.name)) # If we detect an error from the subprocess, then we raise an exception # TODO: Manage if we raise an exception for everything, or use CRITICAL etc... but not stop process # TODO: The responsability of returning a sys.exit() should not be there, but up in the app. if p.returncode: - raise PopenError(cmd, error, p.returncode) + if stderr == subprocess.PIPE: + raise PopenError(cmd, error, p.returncode) + else: + # TODO: To Handle properly with a design behind, if we received a option as a file for the error + raise Exception("Error when calling {0}. Error as been logged in your file {1}. Error code: {2}"\ + .format(cmd, stderr.name, p.returncode)) except OSError as e: message = "The subprocess {0} has encountered an OSError: {1}".format(cmd, e.strerror) @@ -75,7 +85,6 @@ sys.exit(-1) return p - def twoBitInfo(two_bit_file_name, two_bit_info_file): """ Call twoBitInfo and write the result into twoBit_info_file @@ -87,7 +96,6 @@ p = _handleExceptionAndCheckCall(array_call) return p - def faToTwoBit(fasta_file_name, twoBitFile): """ This function call faToTwoBit UCSC tool, and return the twoBitFile @@ -101,7 +109,6 @@ return twoBitFile - def gtfToGenePred(input_gtf_file_name, gene_pred_file_name): """ Call gtfToGenePred and write the result into gene_pred_file_name @@ -113,7 +120,6 @@ p = _handleExceptionAndCheckCall(array_call) return p - def gff3ToGenePred(input_gff3_file_name, gene_pred_file_name): """ Call gff3ToGenePred and write the result into gene_pred_file_name @@ -149,7 +155,6 @@ p = _handleExceptionAndCheckCall(array_call) return p - def sort(unsorted_bed_file_name, sorted_bed_file_name): """ Call sort with -k1,1 -k2,2n on unsorted_bed_file_name and write the result into sorted_bed_file_name @@ -161,7 +166,6 @@ p = _handleExceptionAndCheckCall(array_call) return p - def sortChromSizes(two_bit_info_file_name, chrom_sizes_file_name): """ Call sort with -k2rn on two_bit_info_file_name and write the result into chrom_sizes_file_name @@ -173,7 +177,6 @@ p = _handleExceptionAndCheckCall(array_call) return p - def bedToBigBed(sorted_bed_file_name, chrom_sizes_file_name, big_bed_file_name, typeOption=None, autoSql=None, tab=False): """ @@ -206,7 +209,6 @@ p = _handleExceptionAndCheckCall(array_call) return p - def sortBam(input_bam_file_name, output_sorted_bam_name): """ Call samtools on input_bam_file_name and output the result in output_sorted_bam_name @@ -218,7 +220,6 @@ p = _handleExceptionAndCheckCall(array_call) return p - def createBamIndex(input_sorted_bam_file_name, output_name_index_name): """ Call `samtools index` on imput_sorted_bam_file_name and output the result in output_name_index_name @@ -229,3 +230,16 @@ array_call = ['samtools', 'index', input_sorted_bam_file_name, output_name_index_name] p = _handleExceptionAndCheckCall(array_call) return p + +def pslToBigPsl(input_psl_file_name, output_bed12_file_name): + """ + Call `pslToBigPsl` on input_psl_file_name and output the result in output_bed12_file_name + :param input_psl_file_name: Name of the psl input file + :param output_bed12_file_name: Name of the output file where to store the result of the cmd + :return: + """ + # The command to send + array_call = ['pslToBigPsl', input_psl_file_name, output_bed12_file_name] + + p = _handleExceptionAndCheckCall(array_call) + return p