hubarchivecreator: hubArchiveCreator.py comparison

comparison hubArchiveCreator.py @ 10:acc233161f50 draft

planemo upload for repository https://github.com/goeckslab/hub-archive-creator commit 1b1063f90004764bcf504f4340738eca5c4b1f9d

author	rmarenco
date	Thu, 21 Jul 2016 05:58:51 -0400
parents	4f9847539a28
children	d05236b15f81

comparison

equal deleted inserted replaced

-:4f9847539a28
+:acc233161f50
 import collections
 import json
 import sys
 # Internal dependencies
-from TrackHub import TrackHub
-from Gff3 import Gff3
 from Bam import Bam
 from BedSimpleRepeats import BedSimpleRepeats
 from Bed import Bed
 from BigWig import BigWig
+from util.Fasta import Fasta
+from Gff3 import Gff3
 from Gtf import Gtf
+from TrackHub import TrackHub
 # TODO: Verify each subprocessed dependency is accessible [gff3ToGenePred, genePredToBed, twoBitInfo, faToTwoBit, bedToBigBed, sort
 help='Name, in galaxy, of the output folder. Where you would want to build the Track Hub Archive')
 parser.add_argument('-o', '--output', help='Name of the HTML summarizing the content of the Track Hub Archive')
 parser.add_argument('-j', '--data_json', help='Json containing the metadata of the inputs')
+parser.add_argument('--user_email', help='Email of the user who launched the Hub Archive Creation')
+parser.add_argument('--genome_name', help='UCSC Genome Browser assembly ID')
 ucsc_tools_path = ''
 toolDirectory = '.'
 extra_files_path = '.'
 # Get the args passed in parameter
 args = parser.parse_args()
-input_fasta_file = args.fasta
+array_inputs_reference_genome = json.loads(args.fasta)
+# TODO: Replace these with the object Fasta
+input_fasta_file = array_inputs_reference_genome["false_path"]
+input_fasta_file_name = sanitize_name_input(array_inputs_reference_genome["name"])
+genome_name = sanitize_name_input(args.genome_name)
+reference_genome = Fasta(array_inputs_reference_genome["false_path"],
+input_fasta_file_name, genome_name)
+user_email = args.user_email
 # TODO: Add array for each input because we can add multiple -b for example + filter the data associated
 array_inputs_gff3 = args.gff3
 array_inputs_bed_simple_repeats = args.bedSimpleRepeats
 array_inputs_bed_generic = args.bed
 array_inputs_gtf = args.gtf
 if args.directory:
 toolDirectory = args.directory
 if args.extra_files_path:
 extra_files_path = args.extra_files_path
-# TODO: Check here all the binaries / tools we need. Exception is missing
+# TODO: Check here all the binaries / tools we need. Exception if missing
 # Create the Track Hub folder
-trackHub = TrackHub(input_fasta_file, outputFile, extra_files_path, toolDirectory)
+trackHub = TrackHub(reference_genome, user_email, outputFile, extra_files_path, toolDirectory)
 all_datatype_dictionary = {}
+datatype_parameters = (inputs_data, all_datatype_dictionary)
 # Process Augustus
 if array_inputs_gff3:
-create_ordered_datatype_objects(Gff3, array_inputs_gff3, inputs_data, input_fasta_file,
+create_ordered_datatype_objects(Gff3, array_inputs_gff3, *datatype_parameters)
-extra_files_path, all_datatype_dictionary, toolDirectory)
-# Process Bed simple repeats => From Tandem Repeats Finder / TrfBig
+# Process Bed simple repeats
 if array_inputs_bed_simple_repeats:
-create_ordered_datatype_objects(BedSimpleRepeats, array_inputs_bed_simple_repeats, inputs_data, input_fasta_file,
+create_ordered_datatype_objects(BedSimpleRepeats, array_inputs_bed_simple_repeats, *datatype_parameters)
-extra_files_path, all_datatype_dictionary, toolDirectory)
-# Process a Bed => tBlastN or TopHat
+# Process Bed
 if array_inputs_bed_generic:
-create_ordered_datatype_objects(Bed, array_inputs_bed_generic, inputs_data, input_fasta_file,
+create_ordered_datatype_objects(Bed, array_inputs_bed_generic, *datatype_parameters)
-extra_files_path, all_datatype_dictionary, toolDirectory)
-# Process a GTF => Tophat
+# Process GTF
 if array_inputs_gtf:
-create_ordered_datatype_objects(Gtf, array_inputs_gtf, inputs_data, input_fasta_file,
+create_ordered_datatype_objects(Gtf, array_inputs_gtf, *datatype_parameters)
-extra_files_path, all_datatype_dictionary, toolDirectory)
-# Process a Bam => Tophat
+# Process Bam
 if array_inputs_bam:
-create_ordered_datatype_objects(Bam, array_inputs_bam, inputs_data, input_fasta_file,
+create_ordered_datatype_objects(Bam, array_inputs_bam, *datatype_parameters)
-extra_files_path, all_datatype_dictionary, toolDirectory)
-# Process a BigWig => From Bam
+# Process BigWig
 if array_inputs_bigwig:
-create_ordered_datatype_objects(BigWig, array_inputs_bigwig, inputs_data, input_fasta_file,
+create_ordered_datatype_objects(BigWig, array_inputs_bigwig, *datatype_parameters)
-extra_files_path, all_datatype_dictionary, toolDirectory)
 # Create Ordered Dictionary to add the tracks in the tool form order
 all_datatype_ordered_dictionary = collections.OrderedDict(all_datatype_dictionary)
 for index, datatypeObject in all_datatype_ordered_dictionary.iteritems():
 # We terminate le process and so create a HTML file summarizing all the files
 trackHub.terminate()
 sys.exit(0)
+def sanitize_name_input(string_to_sanitize):
+return string_to_sanitize \
+.replace("/", "_") \
+.replace(" ", "_")
 def sanitize_name_inputs(inputs_data):
 """
 Sometimes output from Galaxy, or even just file name from user have spaces
 Also, it can contain '/' character and could break the use of os.path function
 :param inputs_data: dict[string, dict[string, string]]
 :return:
 """
 for key in inputs_data:
-inputs_data[key]["name"] = inputs_data[key]["name"]\
+inputs_data[key]["name"] = sanitize_name_input(inputs_data[key]["name"])
-.replace("/", "_")\
-.replace(" ", "_")
-def create_ordered_datatype_objects(ExtensionClass, array_inputs, inputs_data, input_fasta_file,
+def create_ordered_datatype_objects(ExtensionClass, array_inputs, inputs_data, all_datatype_dictionary):
-extra_files_path, all_datatype_dictionary, tool_directory):
 """
 Function which executes the creation all the necessary files / folders for a special Datatype, for TrackHub
 and update the dictionary of datatype
 :param ExtensionClass: T <= Datatype
 :param array_inputs: list[string]
 :param inputs_data:
-:param input_fasta_file: string
-:param extra_files_path: string
-:param tool_directory; string
 """
 datatype_dictionary = {}
 # TODO: Optimize this double loop
 for input_false_path in array_inputs:
 for key, data_value in inputs_data.items():
 if key == input_false_path:
-extensionObject = ExtensionClass(input_false_path, data_value,
+extensionObject = ExtensionClass(input_false_path, data_value)
-input_fasta_file, extra_files_path, tool_directory)
 datatype_dictionary.update({data_value["order_index"]: extensionObject})
 all_datatype_dictionary.update(datatype_dictionary)
 if __name__ == "__main__":
 main(sys.argv)

Mercurial > repos > rmarenco > hubarchivecreator

comparison hubArchiveCreator.py @ 10:acc233161f50 draft