Mercurial > repos > rmarenco > hubarchivecreator
changeset 11:d05236b15f81 draft
planemo upload for repository https://github.com/goeckslab/hub-archive-creator commit 3760d0c8353b924ecf994131a5c2eb381aa81fb2
author | rmarenco |
---|---|
date | Wed, 27 Jul 2016 10:10:49 -0400 |
parents | acc233161f50 |
children | 747475757cb0 |
files | Bam.py Bam.pyc Bed.py Bed.pyc BedSimpleRepeats.py BedSimpleRepeats.pyc BigWig.py BigWig.pyc Datatype.py Datatype.pyc Gff3.py Gff3.pyc Gtf.py Gtf.pyc TrackHub.py TrackHub.pyc hubArchiveCreator.py hubArchiveCreator.xml util/Fasta.pyc util/subtools.pyc |
diffstat | 20 files changed, 222 insertions(+), 143 deletions(-) [+] |
line wrap: on
line diff
--- a/Bam.py Thu Jul 21 05:58:51 2016 -0400 +++ b/Bam.py Wed Jul 27 10:10:49 2016 -0400 @@ -24,11 +24,15 @@ self.data_bam = data_bam # TODO: Check if it already contains the .bam extension / Do a function in Datatype which check the extension - self.name_bam = self.data_bam["name"] + ".bam" + if ".bam" not in self.data_bam["name"]: + self.name_bam = self.data_bam["name"] + ".bam" + else: + self.name_bam = self.data_bam["name"] + self.priority = self.data_bam["order_index"] self.index_bam = self.data_bam["index"] - print "Creating TrackHub BAM from (falsePath: %s; name: %s)" % ( self.input_bam_false_path, self.name_bam) + #print "Creating TrackHub BAM from (falsePath: %s; name: %s)" % ( self.input_bam_false_path, self.name_bam) # First: Add the bam file # Second: Add the bam index file, in the same folder (https://genome.ucsc.edu/goldenpath/help/bam.html) @@ -42,23 +46,29 @@ shutil.copyfile(self.index_bam, bam_index_file_path) # Create the Track Object - dataURL = "tracks/%s" % self.name_bam + self.createTrack(file_path=self.name_bam, + track_name=self.name_bam, + long_label=self.name_bam, track_type='bam', visibility='pack', priority=self.priority, + track_file=bam_index_file_path) + # + # dataURL = "tracks/%s" % self.name_bam + # + # trackDb = TrackDb( + # trackName=self.name_bam, + # longLabel=self.name_bam, + # shortLabel=self.getShortName( self.name_bam ), + # trackDataURL=dataURL, + # trackType='bam', + # visibility='pack', + # priority=self.priority, + # ) + # + # # Return the Bam Track Object + # self.track = Track( + # trackFile=bam_index_file_path, + # trackDb=trackDb, + # ) - trackDb = TrackDb( - trackName=self.name_bam, - longLabel=self.name_bam, - shortLabel=self.getShortName( self.name_bam ), - trackDataURL=dataURL, - trackType='bam', - visibility='pack', - priority=self.priority, - ) - - # Return the Bam Track Object - self.track = Track( - trackFile=bam_index_file_path, - trackDb=trackDb, - ) - - print("- %s created in %s" % (self.name_bam, bam_file_path)) - print("- %s created in %s" % (self.index_bam, bam_index_file_path)) + print("- Bam %s created" % self.name_bam) + #print("- %s created in %s" % (self.name_bam, bam_file_path)) + #print("- %s created in %s" % (self.index_bam, bam_index_file_path))
--- a/Bed.py Thu Jul 21 05:58:51 2016 -0400 +++ b/Bed.py Wed Jul 27 10:10:49 2016 -0400 @@ -36,23 +36,30 @@ subtools.bedToBigBed(self.sortedBedFile.name, self.chromSizesFile.name, self.bigBedFile.name) # Create the Track Object - dataURL = "tracks/%s" % trackName + self.createTrack(file_path=trackName, + track_name=trackName, + long_label=self.name_bed_generic, track_type='bigBed', visibility='dense', + priority=self.priority, + track_file=myBigBedFilePath) - trackDb = TrackDb( - trackName=trackName, - longLabel=self.name_bed_generic, - shortLabel=self.getShortName(self.name_bed_generic), - trackDataURL=dataURL, - trackType='bigBed', - visibility='dense', - thickDrawItem='on', - priority=self.priority, - ) + # dataURL = "tracks/%s" % trackName + # + # trackDb = TrackDb( + # trackName=trackName, + # longLabel=self.name_bed_generic, + # shortLabel=self.getShortName(self.name_bed_generic), + # trackDataURL=dataURL, + # trackType='bigBed', + # visibility='dense', + # thickDrawItem='on', + # priority=self.priority, + # ) + # + # # Return the BigBed track + # self.track = Track( + # trackFile=myBigBedFilePath, + # trackDb=trackDb, + # ) - # Return the BigBed track - self.track = Track( - trackFile=myBigBedFilePath, - trackDb=trackDb, - ) - - print("- %s created in %s" % (trackName, myBigBedFilePath)) + print("- Bed %s created" % self.name_bed_generic) + #print("- %s created in %s" % (trackName, myBigBedFilePath))
--- a/BedSimpleRepeats.py Thu Jul 21 05:58:51 2016 -0400 +++ b/BedSimpleRepeats.py Wed Jul 27 10:10:49 2016 -0400 @@ -34,21 +34,28 @@ autoSql=auto_sql_option) # Create the Track Object - dataURL = "tracks/%s" % trackName + self.createTrack(file_path=trackName, + track_name=trackName, + long_label=self.name_bed_simple_repeats, track_type='bigBed 4 +', visibility='dense', + priority=self.priority, + track_file=myBigBedFilePath) - trackDb = TrackDb( - trackName=trackName, - longLabel=self.name_bed_simple_repeats, - shortLabel=self.getShortName( self.name_bed_simple_repeats ), - trackDataURL=dataURL, - trackType='bigBed 4 +', - visibility='dense', - priority=self.priority, - ) + # dataURL = "tracks/%s" % trackName + # + # trackDb = TrackDb( + # trackName=trackName, + # longLabel=self.name_bed_simple_repeats, + # shortLabel=self.getShortName( self.name_bed_simple_repeats ), + # trackDataURL=dataURL, + # trackType='bigBed 4 +', + # visibility='dense', + # priority=self.priority, + # ) + # + # self.track = Track( + # trackFile=myBigBedFilePath, + # trackDb=trackDb, + # ) - self.track = Track( - trackFile=myBigBedFilePath, - trackDb=trackDb, - ) - - print("- %s created in %s" % (trackName, myBigBedFilePath)) + print("- Bed simple repeats %s created" % self.name_bed_simple_repeats) + #print("- %s created in %s" % (trackName, myBigBedFilePath))
--- a/BigWig.py Thu Jul 21 05:58:51 2016 -0400 +++ b/BigWig.py Wed Jul 27 10:10:49 2016 -0400 @@ -19,7 +19,7 @@ self.name_bigwig = data_bigwig["name"] self.priority = data_bigwig["order_index"] - print "Creating TrackHub BigWig from (falsePath: %s; name: %s)" % ( self.input_bigwig_path, self.name_bigwig ) + #print "Creating TrackHub BigWig from (falsePath: %s; name: %s)" % ( self.input_bigwig_path, self.name_bigwig ) trackName = "".join( ( self.name_bigwig, ".bigwig" ) ) @@ -27,22 +27,30 @@ shutil.copy(self.input_bigwig_path, myBigWigFilePath) # Create the Track Object - dataURL = "tracks/%s" % trackName + self.createTrack(file_path=trackName, + track_name=trackName, + long_label=self.name_bigwig, track_type='bigwig', visibility='full', + priority=self.priority, + track_file=myBigWigFilePath) - # Return the BigBed track - trackDb = TrackDb( - trackName=trackName, - longLabel=self.name_bigwig, - shortLabel=self.getShortName( self.name_bigwig ), - trackDataURL=dataURL, - trackType='bigWig', - visibility='full', - priority=self.priority, - ) + # dataURL = "tracks/%s" % trackName + # + # # Return the BigBed track + # + # trackDb = TrackDb( + # trackName=trackName, + # longLabel=self.name_bigwig, + # shortLabel=self.getShortName( self.name_bigwig ), + # trackDataURL=dataURL, + # trackType='bigWig', + # visibility='full', + # priority=self.priority, + # ) + # + # self.track = Track( + # trackFile=myBigWigFilePath, + # trackDb=trackDb, + # ) - self.track = Track( - trackFile=myBigWigFilePath, - trackDb=trackDb, - ) - - print("- %s created in %s" % (trackName, myBigWigFilePath)) + print("- BigWig %s created" % self.name_bigwig) + #print("- %s created in %s" % (trackName, myBigWigFilePath))
--- a/Datatype.py Thu Jul 21 05:58:51 2016 -0400 +++ b/Datatype.py Wed Jul 27 10:10:49 2016 -0400 @@ -9,6 +9,8 @@ import tempfile from util import subtools +from Track import Track +from TrackDb import TrackDb class Datatype(object): @@ -25,6 +27,8 @@ twoBitFile = None chromSizesFile = None + track = None + def __init__(self): not_init_message = "The {0} is not initialized." \ @@ -37,6 +41,7 @@ raise TypeError(not_init_message.format('tool directory')) + @staticmethod def pre_init(reference_genome, two_bit_path, chrom_sizes_file, extra_files_path, tool_directory, specie_folder, tracks_folder): @@ -66,3 +71,39 @@ short_label_slice = slice(0, 15) return name_to_shortify[short_label_slice] + + # TODO: Better handle parameters, use heritance mecanism + # TODO: Use default parameters for some, like visibility + def createTrack(self, + file_path=None, + track_name=None, long_label=None, thick_draw_item='off', + short_label=None, track_type=None, visibility=None, priority=None, + track_file=None): + + # TODO: Remove the hardcoded "tracks" by the value used as variable from myTrackFolderPath + data_url = "tracks/%s" % file_path + + if not short_label: + short_label = self.getShortName(long_label) + + # Replace '_' by ' ', to invert the sanitization mecanism + # TODO: Find a better way to manage the sanitization of file path + long_label = long_label.replace("_", " ") + short_label = short_label.replace("_", " ") + + track_db = TrackDb( + trackName=track_name, + longLabel=long_label, + shortLabel=short_label, + trackDataURL=data_url, + trackType=track_type, + visibility=visibility, + thickDrawItem=thick_draw_item, + priority=priority, + ) + + # Return the Bam Track Object + self.track = Track( + trackFile=track_file, + trackDb=track_db, + )
--- a/Gff3.py Thu Jul 21 05:58:51 2016 -0400 +++ b/Gff3.py Wed Jul 27 10:10:49 2016 -0400 @@ -47,21 +47,27 @@ subtools.bedToBigBed(sortedBedFile.name, self.chromSizesFile.name, bigBedFile.name) # Create the Track Object - dataURL = "tracks/%s" % trackName + self.createTrack(file_path=trackName, + track_name=trackName, + long_label=self.name_gff3, track_type='bigBed 12 +', visibility='dense', priority=self.priority, + track_file=myBigBedFilePath) - trackDb = TrackDb( - trackName=trackName, - longLabel=self.name_gff3, - shortLabel=self.getShortName( self.name_gff3 ), - trackDataURL=dataURL, - trackType='bigBed 12 +', - visibility='dense', - priority=self.priority, - ) + # dataURL = "tracks/%s" % trackName + # + # trackDb = TrackDb( + # trackName=trackName, + # longLabel=self.name_gff3, + # shortLabel=self.getShortName( self.name_gff3 ), + # trackDataURL=dataURL, + # trackType='bigBed 12 +', + # visibility='dense', + # priority=self.priority, + # ) + # + # self.track = Track( + # trackFile=myBigBedFilePath, + # trackDb=trackDb, + # ) - self.track = Track( - trackFile=myBigBedFilePath, - trackDb=trackDb, - ) - - print("- %s created in %s" % (trackName, myBigBedFilePath)) + print("- Gff3 %s created" % self.name_gff3) + #print("- %s created in %s" % (trackName, myBigBedFilePath))
--- a/Gtf.py Thu Jul 21 05:58:51 2016 -0400 +++ b/Gtf.py Wed Jul 27 10:10:49 2016 -0400 @@ -21,7 +21,7 @@ self.name_gtf = data_gtf["name"] self.priority = data_gtf["order_index"] - print "Creating TrackHub GTF from (falsePath: %s; name: %s)" % ( self.input_gtf_false_path, self.name_gtf) + #print "Creating TrackHub GTF from (falsePath: %s; name: %s)" % ( self.input_gtf_false_path, self.name_gtf) # TODO: See if we need these temporary files as part of the generated files genePredFile = tempfile.NamedTemporaryFile(bufsize=0, suffix=".genePred") @@ -46,20 +46,26 @@ subtools.bedToBigBed(sortedBedFile.name, self.chromSizesFile.name, bigBedFile.name) # Create the Track Object - dataURL = "tracks/%s" % trackName + self.createTrack(file_path=trackName, + track_name=trackName, + long_label=self.name_gtf, track_type='bigBed 12 +', visibility='dense', priority=self.priority, + track_file=myBigBedFilePath) + # + # dataURL = "tracks/%s" % trackName + # + # trackDb = TrackDb( + # trackName=trackName, + # longLabel=self.name_gtf, + # shortLabel=self.getShortName( self.name_gtf ), + # trackDataURL=dataURL, + # trackType='bigBed 12 +', + # visibility='dense', + # priority=self.priority, + # ) + # self.track = Track( + # trackFile=myBigBedFilePath, + # trackDb=trackDb, + # ) - trackDb = TrackDb( - trackName=trackName, - longLabel=self.name_gtf, - shortLabel=self.getShortName( self.name_gtf ), - trackDataURL=dataURL, - trackType='bigBed 12 +', - visibility='dense', - priority=self.priority, - ) - self.track = Track( - trackFile=myBigBedFilePath, - trackDb=trackDb, - ) - - print("- %s created in %s" % (trackName, myBigBedFilePath)) + print("- Gtf %s created" % self.name_gtf) + #print("- %s created in %s" % (trackName, myBigBedFilePath))
--- a/TrackHub.py Thu Jul 21 05:58:51 2016 -0400 +++ b/TrackHub.py Wed Jul 27 10:10:49 2016 -0400 @@ -28,6 +28,7 @@ self.reference_genome = inputFastaFile # TODO: Add the specie name self.genome_name = inputFastaFile.assembly_id + self.specie_html = self.genome_name + '.html' self.default_pos = None self.user_email = user_email @@ -148,8 +149,7 @@ self.__fillHubTxt__(hubTxtFilePath) # Add the hub.html file - # TODO: Change the name and get it depending on the specie - hubHtmlFilePath = os.path.join(myHubPath, 'dbia.html') + hubHtmlFilePath = os.path.join(myHubPath, self.specie_html) self.__fillHubHtmlFile__(hubHtmlFilePath) @@ -208,7 +208,7 @@ longLabel=self.genome_name, genomesFile='genomes.txt', email=self.user_email, - descriptionUrl='dbia.html' + descriptionUrl=self.specie_html ) genomesTxtFile.write(htmlMakoRendered)
--- a/hubArchiveCreator.py Thu Jul 21 05:58:51 2016 -0400 +++ b/hubArchiveCreator.py Wed Jul 27 10:10:49 2016 -0400 @@ -82,13 +82,16 @@ input_fasta_file_name = sanitize_name_input(array_inputs_reference_genome["name"]) genome_name = sanitize_name_input(args.genome_name) - reference_genome = Fasta(array_inputs_reference_genome["false_path"], + reference_genome = Fasta(input_fasta_file, input_fasta_file_name, genome_name) user_email = args.user_email - # TODO: Add array for each input because we can add multiple -b for example + filter the data associated + # TODO: Use a class to have a better management of the structure of these inputs + # These inputs are populated in the Galaxy Wrapper xml and are in this format: + # ARRAY[DICT{FILE_PATH: DICT{NAME: NAME_VALUE, EXTRA_DATA: EXTRA_DATA_VALUE}}] + # EXTRA_DATA could be anything, for example the index of a BAM => {"index", FILE_PATH} array_inputs_gff3 = args.gff3 array_inputs_bed_simple_repeats = args.bedSimpleRepeats array_inputs_bed_generic = args.bed @@ -117,31 +120,14 @@ all_datatype_dictionary = {} - datatype_parameters = (inputs_data, all_datatype_dictionary) - - # Process Augustus - if array_inputs_gff3: - create_ordered_datatype_objects(Gff3, array_inputs_gff3, *datatype_parameters) - - # Process Bed simple repeats - if array_inputs_bed_simple_repeats: - create_ordered_datatype_objects(BedSimpleRepeats, array_inputs_bed_simple_repeats, *datatype_parameters) - - # Process Bed - if array_inputs_bed_generic: - create_ordered_datatype_objects(Bed, array_inputs_bed_generic, *datatype_parameters) - - # Process GTF - if array_inputs_gtf: - create_ordered_datatype_objects(Gtf, array_inputs_gtf, *datatype_parameters) - - # Process Bam - if array_inputs_bam: - create_ordered_datatype_objects(Bam, array_inputs_bam, *datatype_parameters) - - # Process BigWig - if array_inputs_bigwig: - create_ordered_datatype_objects(BigWig, array_inputs_bigwig, *datatype_parameters) + for (inputs, datatype_class) in [(array_inputs_gff3, Gff3), + (array_inputs_bed_simple_repeats, BedSimpleRepeats), + (array_inputs_bed_generic, Bed), + (array_inputs_gtf, Gtf), + (array_inputs_bam, Bam), + (array_inputs_bigwig, BigWig)]: + if inputs: + all_datatype_dictionary.update(create_ordered_datatype_objects(datatype_class, inputs, inputs_data)) # Create Ordered Dictionary to add the tracks in the tool form order all_datatype_ordered_dictionary = collections.OrderedDict(all_datatype_dictionary) @@ -155,13 +141,19 @@ # We terminate le process and so create a HTML file summarizing all the files trackHub.terminate() + print "\t" + print "--------------" + print "Well done guys! Your data are ready to be displayed in UCSC Track Hub." + sys.exit(0) + def sanitize_name_input(string_to_sanitize): return string_to_sanitize \ .replace("/", "_") \ .replace(" ", "_") + def sanitize_name_inputs(inputs_data): """ Sometimes output from Galaxy, or even just file name from user have spaces @@ -173,7 +165,7 @@ inputs_data[key]["name"] = sanitize_name_input(inputs_data[key]["name"]) -def create_ordered_datatype_objects(ExtensionClass, array_inputs, inputs_data, all_datatype_dictionary): +def create_ordered_datatype_objects(ExtensionClass, array_inputs, inputs_data): """ Function which executes the creation all the necessary files / folders for a special Datatype, for TrackHub and update the dictionary of datatype @@ -189,9 +181,8 @@ for key, data_value in inputs_data.items(): if key == input_false_path: extensionObject = ExtensionClass(input_false_path, data_value) - datatype_dictionary.update({data_value["order_index"]: extensionObject}) - all_datatype_dictionary.update(datatype_dictionary) + return datatype_dictionary if __name__ == "__main__": main(sys.argv)
--- a/hubArchiveCreator.xml Thu Jul 21 05:58:51 2016 -0400 +++ b/hubArchiveCreator.xml Wed Jul 27 10:10:49 2016 -0400 @@ -1,4 +1,4 @@ -<tool id="hubArchiveCreator" name="Hub Archive Creator" version="2.0.2"> +<tool id="hubArchiveCreator" name="Hub Archive Creator" version="2.3.0"> <description> This Galaxy tool permits to prepare your files to be ready for Assembly Hub visualization. @@ -11,9 +11,12 @@ <requirement type="package" version="0.0.1">genePredToBed</requirement> <requirement type="package" version="1.2">samtools</requirement> <!-- Conda dependencies --> + <requirement type="package" version="332">ucsc-bedtobigbed</requirement> + <requirement type="package" version="332">ucsc-fatotwobit</requirement> + <requirement type="package" version="324">ucsc-genepredtobed</requirement> <requirement type="package" version="324">ucsc-gff3togenepred</requirement> <requirement type="package" version="324">ucsc-gtftogenepred</requirement> - <requirement type="package" version="324">ucsc-genepredtobed</requirement> + <requirement type="package" version="324">ucsc-twobitinfo</requirement> <requirement type="package" version="1.3.1">samtools</requirement> </requirements> @@ -158,8 +161,8 @@ <when value="bed"> <conditional name="bedChoice"> <param name="bed_select" type="select" label="Bed Choice"> - <option value="bed" selected="true">Generic BED</option> - <option value="bed_simple_repeats_option">BED simple repeats</option> + <option value="bed" selected="true">BED Generic (bed3+)</option> + <option value="bed_simple_repeats_option">BED Simple repeat (bed4+12 / simpleRepeat.as)</option> </param> <when value="bed"> <param