hubarchivecreator: hubArchiveCreator.py comparison

comparison hubArchiveCreator.py @ 11:d05236b15f81 draft

planemo upload for repository https://github.com/goeckslab/hub-archive-creator commit 3760d0c8353b924ecf994131a5c2eb381aa81fb2

author	rmarenco
date	Wed, 27 Jul 2016 10:10:49 -0400
parents	acc233161f50
children	25809f699cb3

comparison

equal deleted inserted replaced

-:acc233161f50
+:d05236b15f81
 # TODO: Replace these with the object Fasta
 input_fasta_file = array_inputs_reference_genome["false_path"]
 input_fasta_file_name = sanitize_name_input(array_inputs_reference_genome["name"])
 genome_name = sanitize_name_input(args.genome_name)
-reference_genome = Fasta(array_inputs_reference_genome["false_path"],
+reference_genome = Fasta(input_fasta_file,
 input_fasta_file_name, genome_name)
 user_email = args.user_email
-# TODO: Add array for each input because we can add multiple -b for example + filter the data associated
+# TODO: Use a class to have a better management of the structure of these inputs
+# These inputs are populated in the Galaxy Wrapper xml and are in this format:
+# ARRAY[DICT{FILE_PATH: DICT{NAME: NAME_VALUE, EXTRA_DATA: EXTRA_DATA_VALUE}}]
+# EXTRA_DATA could be anything, for example the index of a BAM => {"index", FILE_PATH}
 array_inputs_gff3 = args.gff3
 array_inputs_bed_simple_repeats = args.bedSimpleRepeats
 array_inputs_bed_generic = args.bed
 array_inputs_gtf = args.gtf
 array_inputs_bam = args.bam
 # Create the Track Hub folder
 trackHub = TrackHub(reference_genome, user_email, outputFile, extra_files_path, toolDirectory)
 all_datatype_dictionary = {}
-datatype_parameters = (inputs_data, all_datatype_dictionary)
+for (inputs, datatype_class) in [(array_inputs_gff3, Gff3),
+(array_inputs_bed_simple_repeats, BedSimpleRepeats),
-# Process Augustus
+(array_inputs_bed_generic, Bed),
-if array_inputs_gff3:
+(array_inputs_gtf, Gtf),
-create_ordered_datatype_objects(Gff3, array_inputs_gff3, *datatype_parameters)
+(array_inputs_bam, Bam),
+(array_inputs_bigwig, BigWig)]:
-# Process Bed simple repeats
+if inputs:
-if array_inputs_bed_simple_repeats:
+all_datatype_dictionary.update(create_ordered_datatype_objects(datatype_class, inputs, inputs_data))
-create_ordered_datatype_objects(BedSimpleRepeats, array_inputs_bed_simple_repeats, *datatype_parameters)
-# Process Bed
-if array_inputs_bed_generic:
-create_ordered_datatype_objects(Bed, array_inputs_bed_generic, *datatype_parameters)
-# Process GTF
-if array_inputs_gtf:
-create_ordered_datatype_objects(Gtf, array_inputs_gtf, *datatype_parameters)
-# Process Bam
-if array_inputs_bam:
-create_ordered_datatype_objects(Bam, array_inputs_bam, *datatype_parameters)
-# Process BigWig
-if array_inputs_bigwig:
-create_ordered_datatype_objects(BigWig, array_inputs_bigwig, *datatype_parameters)
 # Create Ordered Dictionary to add the tracks in the tool form order
 all_datatype_ordered_dictionary = collections.OrderedDict(all_datatype_dictionary)
 for index, datatypeObject in all_datatype_ordered_dictionary.iteritems():
 #trackHub.createZip()
 # We terminate le process and so create a HTML file summarizing all the files
 trackHub.terminate()
+print "\t"
+print "--------------"
+print "Well done guys! Your data are ready to be displayed in UCSC Track Hub."
 sys.exit(0)
 def sanitize_name_input(string_to_sanitize):
 return string_to_sanitize \
 .replace("/", "_") \
 .replace(" ", "_")
 def sanitize_name_inputs(inputs_data):
 """
 Sometimes output from Galaxy, or even just file name from user have spaces
 Also, it can contain '/' character and could break the use of os.path function
 """
 for key in inputs_data:
 inputs_data[key]["name"] = sanitize_name_input(inputs_data[key]["name"])
-def create_ordered_datatype_objects(ExtensionClass, array_inputs, inputs_data, all_datatype_dictionary):
+def create_ordered_datatype_objects(ExtensionClass, array_inputs, inputs_data):
 """
 Function which executes the creation all the necessary files / folders for a special Datatype, for TrackHub
 and update the dictionary of datatype
 :param ExtensionClass: T <= Datatype
 :param array_inputs: list[string]
 # TODO: Optimize this double loop
 for input_false_path in array_inputs:
 for key, data_value in inputs_data.items():
 if key == input_false_path:
 extensionObject = ExtensionClass(input_false_path, data_value)
 datatype_dictionary.update({data_value["order_index"]: extensionObject})
-all_datatype_dictionary.update(datatype_dictionary)
+return datatype_dictionary
 if __name__ == "__main__":
 main(sys.argv)

Mercurial > repos > rmarenco > hubarchivecreator

comparison hubArchiveCreator.py @ 11:d05236b15f81 draft