Mercurial > repos > rmarenco > hubarchivecreator
diff hubArchiveCreator.py @ 10:acc233161f50 draft
planemo upload for repository https://github.com/goeckslab/hub-archive-creator commit 1b1063f90004764bcf504f4340738eca5c4b1f9d
author | rmarenco |
---|---|
date | Thu, 21 Jul 2016 05:58:51 -0400 |
parents | 4f9847539a28 |
children | d05236b15f81 |
line wrap: on
line diff
--- a/hubArchiveCreator.py Wed Jul 20 12:29:08 2016 -0400 +++ b/hubArchiveCreator.py Thu Jul 21 05:58:51 2016 -0400 @@ -14,13 +14,14 @@ import sys # Internal dependencies -from TrackHub import TrackHub -from Gff3 import Gff3 from Bam import Bam from BedSimpleRepeats import BedSimpleRepeats from Bed import Bed from BigWig import BigWig +from util.Fasta import Fasta +from Gff3 import Gff3 from Gtf import Gtf +from TrackHub import TrackHub # TODO: Verify each subprocessed dependency is accessible [gff3ToGenePred, genePredToBed, twoBitInfo, faToTwoBit, bedToBigBed, sort @@ -62,6 +63,10 @@ parser.add_argument('-j', '--data_json', help='Json containing the metadata of the inputs') + parser.add_argument('--user_email', help='Email of the user who launched the Hub Archive Creation') + + parser.add_argument('--genome_name', help='UCSC Genome Browser assembly ID') + ucsc_tools_path = '' toolDirectory = '.' @@ -70,11 +75,20 @@ # Get the args passed in parameter args = parser.parse_args() - input_fasta_file = args.fasta + array_inputs_reference_genome = json.loads(args.fasta) + + # TODO: Replace these with the object Fasta + input_fasta_file = array_inputs_reference_genome["false_path"] + input_fasta_file_name = sanitize_name_input(array_inputs_reference_genome["name"]) + genome_name = sanitize_name_input(args.genome_name) + + reference_genome = Fasta(array_inputs_reference_genome["false_path"], + input_fasta_file_name, genome_name) + + user_email = args.user_email # TODO: Add array for each input because we can add multiple -b for example + filter the data associated - array_inputs_gff3 = args.gff3 array_inputs_bed_simple_repeats = args.bedSimpleRepeats array_inputs_bed_generic = args.bed @@ -96,42 +110,38 @@ if args.extra_files_path: extra_files_path = args.extra_files_path - # TODO: Check here all the binaries / tools we need. Exception is missing + # TODO: Check here all the binaries / tools we need. Exception if missing # Create the Track Hub folder - trackHub = TrackHub(input_fasta_file, outputFile, extra_files_path, toolDirectory) + trackHub = TrackHub(reference_genome, user_email, outputFile, extra_files_path, toolDirectory) all_datatype_dictionary = {} + datatype_parameters = (inputs_data, all_datatype_dictionary) + # Process Augustus if array_inputs_gff3: - create_ordered_datatype_objects(Gff3, array_inputs_gff3, inputs_data, input_fasta_file, - extra_files_path, all_datatype_dictionary, toolDirectory) + create_ordered_datatype_objects(Gff3, array_inputs_gff3, *datatype_parameters) - # Process Bed simple repeats => From Tandem Repeats Finder / TrfBig + # Process Bed simple repeats if array_inputs_bed_simple_repeats: - create_ordered_datatype_objects(BedSimpleRepeats, array_inputs_bed_simple_repeats, inputs_data, input_fasta_file, - extra_files_path, all_datatype_dictionary, toolDirectory) + create_ordered_datatype_objects(BedSimpleRepeats, array_inputs_bed_simple_repeats, *datatype_parameters) - # Process a Bed => tBlastN or TopHat + # Process Bed if array_inputs_bed_generic: - create_ordered_datatype_objects(Bed, array_inputs_bed_generic, inputs_data, input_fasta_file, - extra_files_path, all_datatype_dictionary, toolDirectory) + create_ordered_datatype_objects(Bed, array_inputs_bed_generic, *datatype_parameters) - # Process a GTF => Tophat + # Process GTF if array_inputs_gtf: - create_ordered_datatype_objects(Gtf, array_inputs_gtf, inputs_data, input_fasta_file, - extra_files_path, all_datatype_dictionary, toolDirectory) + create_ordered_datatype_objects(Gtf, array_inputs_gtf, *datatype_parameters) - # Process a Bam => Tophat + # Process Bam if array_inputs_bam: - create_ordered_datatype_objects(Bam, array_inputs_bam, inputs_data, input_fasta_file, - extra_files_path, all_datatype_dictionary, toolDirectory) + create_ordered_datatype_objects(Bam, array_inputs_bam, *datatype_parameters) - # Process a BigWig => From Bam + # Process BigWig if array_inputs_bigwig: - create_ordered_datatype_objects(BigWig, array_inputs_bigwig, inputs_data, input_fasta_file, - extra_files_path, all_datatype_dictionary, toolDirectory) + create_ordered_datatype_objects(BigWig, array_inputs_bigwig, *datatype_parameters) # Create Ordered Dictionary to add the tracks in the tool form order all_datatype_ordered_dictionary = collections.OrderedDict(all_datatype_dictionary) @@ -147,6 +157,10 @@ sys.exit(0) +def sanitize_name_input(string_to_sanitize): + return string_to_sanitize \ + .replace("/", "_") \ + .replace(" ", "_") def sanitize_name_inputs(inputs_data): """ @@ -156,22 +170,16 @@ :return: """ for key in inputs_data: - inputs_data[key]["name"] = inputs_data[key]["name"]\ - .replace("/", "_")\ - .replace(" ", "_") + inputs_data[key]["name"] = sanitize_name_input(inputs_data[key]["name"]) -def create_ordered_datatype_objects(ExtensionClass, array_inputs, inputs_data, input_fasta_file, - extra_files_path, all_datatype_dictionary, tool_directory): +def create_ordered_datatype_objects(ExtensionClass, array_inputs, inputs_data, all_datatype_dictionary): """ Function which executes the creation all the necessary files / folders for a special Datatype, for TrackHub and update the dictionary of datatype :param ExtensionClass: T <= Datatype :param array_inputs: list[string] :param inputs_data: - :param input_fasta_file: string - :param extra_files_path: string - :param tool_directory; string """ datatype_dictionary = {} @@ -180,8 +188,8 @@ for input_false_path in array_inputs: for key, data_value in inputs_data.items(): if key == input_false_path: - extensionObject = ExtensionClass(input_false_path, data_value, - input_fasta_file, extra_files_path, tool_directory) + extensionObject = ExtensionClass(input_false_path, data_value) + datatype_dictionary.update({data_value["order_index"]: extensionObject}) all_datatype_dictionary.update(datatype_dictionary)