diff hubArchiveCreator.py @ 10:acc233161f50 draft

planemo upload for repository https://github.com/goeckslab/hub-archive-creator commit 1b1063f90004764bcf504f4340738eca5c4b1f9d
author rmarenco
date Thu, 21 Jul 2016 05:58:51 -0400
parents 4f9847539a28
children d05236b15f81
line wrap: on
line diff
--- a/hubArchiveCreator.py	Wed Jul 20 12:29:08 2016 -0400
+++ b/hubArchiveCreator.py	Thu Jul 21 05:58:51 2016 -0400
@@ -14,13 +14,14 @@
 import sys
 
 # Internal dependencies
-from TrackHub import TrackHub
-from Gff3 import Gff3
 from Bam import Bam
 from BedSimpleRepeats import BedSimpleRepeats
 from Bed import Bed
 from BigWig import BigWig
+from util.Fasta import Fasta
+from Gff3 import Gff3
 from Gtf import Gtf
+from TrackHub import TrackHub
 
 
 # TODO: Verify each subprocessed dependency is accessible [gff3ToGenePred, genePredToBed, twoBitInfo, faToTwoBit, bedToBigBed, sort
@@ -62,6 +63,10 @@
 
     parser.add_argument('-j', '--data_json', help='Json containing the metadata of the inputs')
 
+    parser.add_argument('--user_email', help='Email of the user who launched the Hub Archive Creation')
+
+    parser.add_argument('--genome_name', help='UCSC Genome Browser assembly ID')
+
     ucsc_tools_path = ''
 
     toolDirectory = '.'
@@ -70,11 +75,20 @@
     # Get the args passed in parameter
     args = parser.parse_args()
 
-    input_fasta_file = args.fasta
+    array_inputs_reference_genome = json.loads(args.fasta)
+
+    # TODO: Replace these with the object Fasta
+    input_fasta_file = array_inputs_reference_genome["false_path"]
+    input_fasta_file_name = sanitize_name_input(array_inputs_reference_genome["name"])
+    genome_name = sanitize_name_input(args.genome_name)
+
+    reference_genome = Fasta(array_inputs_reference_genome["false_path"],
+                             input_fasta_file_name, genome_name)
+
+    user_email = args.user_email
 
     # TODO: Add array for each input because we can add multiple -b for example + filter the data associated
 
-
     array_inputs_gff3 = args.gff3
     array_inputs_bed_simple_repeats = args.bedSimpleRepeats
     array_inputs_bed_generic = args.bed
@@ -96,42 +110,38 @@
     if args.extra_files_path:
         extra_files_path = args.extra_files_path
 
-    # TODO: Check here all the binaries / tools we need. Exception is missing
+    # TODO: Check here all the binaries / tools we need. Exception if missing
 
     # Create the Track Hub folder
-    trackHub = TrackHub(input_fasta_file, outputFile, extra_files_path, toolDirectory)
+    trackHub = TrackHub(reference_genome, user_email, outputFile, extra_files_path, toolDirectory)
 
     all_datatype_dictionary = {}
 
+    datatype_parameters = (inputs_data, all_datatype_dictionary)
+
     # Process Augustus
     if array_inputs_gff3:
-        create_ordered_datatype_objects(Gff3, array_inputs_gff3, inputs_data, input_fasta_file,
-                                        extra_files_path, all_datatype_dictionary, toolDirectory)
+        create_ordered_datatype_objects(Gff3, array_inputs_gff3, *datatype_parameters)
 
-    # Process Bed simple repeats => From Tandem Repeats Finder / TrfBig
+    # Process Bed simple repeats
     if array_inputs_bed_simple_repeats:
-        create_ordered_datatype_objects(BedSimpleRepeats, array_inputs_bed_simple_repeats, inputs_data, input_fasta_file,
-                                        extra_files_path, all_datatype_dictionary, toolDirectory)
+        create_ordered_datatype_objects(BedSimpleRepeats, array_inputs_bed_simple_repeats, *datatype_parameters)
 
-    # Process a Bed => tBlastN or TopHat
+    # Process Bed
     if array_inputs_bed_generic:
-        create_ordered_datatype_objects(Bed, array_inputs_bed_generic, inputs_data, input_fasta_file,
-                                        extra_files_path, all_datatype_dictionary, toolDirectory)
+        create_ordered_datatype_objects(Bed, array_inputs_bed_generic, *datatype_parameters)
 
-    # Process a GTF => Tophat
+    # Process GTF
     if array_inputs_gtf:
-        create_ordered_datatype_objects(Gtf, array_inputs_gtf, inputs_data, input_fasta_file,
-                                        extra_files_path, all_datatype_dictionary, toolDirectory)
+        create_ordered_datatype_objects(Gtf, array_inputs_gtf, *datatype_parameters)
 
-    # Process a Bam => Tophat
+    # Process Bam
     if array_inputs_bam:
-        create_ordered_datatype_objects(Bam, array_inputs_bam, inputs_data, input_fasta_file,
-                                        extra_files_path, all_datatype_dictionary, toolDirectory)
+        create_ordered_datatype_objects(Bam, array_inputs_bam, *datatype_parameters)
 
-    # Process a BigWig => From Bam
+    # Process BigWig
     if array_inputs_bigwig:
-        create_ordered_datatype_objects(BigWig, array_inputs_bigwig, inputs_data, input_fasta_file,
-                                        extra_files_path, all_datatype_dictionary, toolDirectory)
+        create_ordered_datatype_objects(BigWig, array_inputs_bigwig, *datatype_parameters)
 
     # Create Ordered Dictionary to add the tracks in the tool form order
     all_datatype_ordered_dictionary = collections.OrderedDict(all_datatype_dictionary)
@@ -147,6 +157,10 @@
 
     sys.exit(0)
 
+def sanitize_name_input(string_to_sanitize):
+        return string_to_sanitize \
+            .replace("/", "_") \
+            .replace(" ", "_")
 
 def sanitize_name_inputs(inputs_data):
     """
@@ -156,22 +170,16 @@
     :return:
     """
     for key in inputs_data:
-        inputs_data[key]["name"] = inputs_data[key]["name"]\
-            .replace("/", "_")\
-            .replace(" ", "_")
+        inputs_data[key]["name"] = sanitize_name_input(inputs_data[key]["name"])
 
 
-def create_ordered_datatype_objects(ExtensionClass, array_inputs, inputs_data, input_fasta_file,
-                                    extra_files_path, all_datatype_dictionary, tool_directory):
+def create_ordered_datatype_objects(ExtensionClass, array_inputs, inputs_data, all_datatype_dictionary):
     """
     Function which executes the creation all the necessary files / folders for a special Datatype, for TrackHub
     and update the dictionary of datatype
     :param ExtensionClass: T <= Datatype
     :param array_inputs: list[string]
     :param inputs_data:
-    :param input_fasta_file: string
-    :param extra_files_path: string
-    :param tool_directory; string
     """
 
     datatype_dictionary = {}
@@ -180,8 +188,8 @@
     for input_false_path in array_inputs:
         for key, data_value in inputs_data.items():
             if key == input_false_path:
-                extensionObject = ExtensionClass(input_false_path, data_value,
-                                                 input_fasta_file, extra_files_path, tool_directory)
+                extensionObject = ExtensionClass(input_false_path, data_value)
+
                 datatype_dictionary.update({data_value["order_index"]: extensionObject})
     all_datatype_dictionary.update(datatype_dictionary)