diff hubArchiveCreator.py @ 20:40469b265ddb draft

planemo upload for repository https://github.com/goeckslab/hub-archive-creator commit 3af31e043f5b82636015c18e013d2f22ce6c9077-dirty
author yating-l
date Fri, 20 Jan 2017 17:12:03 -0500
parents d786bca6a75d
children 884ee2a71680
line wrap: on
line diff
--- a/hubArchiveCreator.py	Thu Oct 13 22:49:01 2016 -0400
+++ b/hubArchiveCreator.py	Fri Jan 20 17:12:03 2017 -0500
@@ -18,6 +18,7 @@
 # Internal dependencies
 from Bam import Bam
 from BedSimpleRepeats import BedSimpleRepeats
+from BedSpliceJunctions import BedSpliceJunctions
 from Bed import Bed
 from BigWig import BigWig
 from util.Fasta import Fasta
@@ -26,6 +27,7 @@
 from Gtf import Gtf
 from Psl import Psl
 from TrackHub import TrackHub
+from bigPsl import bigPsl
 
 # TODO: Verify each subprocessed dependency is accessible [gff3ToGenePred, genePredToBed, twoBitInfo, faToTwoBit, bedToBigBed, sort
 
@@ -46,9 +48,15 @@
     # Bed4+12 (TrfBig)
     parser.add_argument('--bedSimpleRepeats', action='append', help='Bed4+12 format, using simpleRepeats.as')
 
+    # Bed12+1 (regtools)
+    parser.add_argument('--bedSpliceJunctions', action='append', help='Bed12+1 format, using spliceJunctions.as')
+
     # Generic Bed (Blastx transformed to bed)
     parser.add_argument('--bed', action='append', help='Bed generic format')
 
+    # Bed12+12 (tblastn)
+    parser.add_argument('--bigpsl', action='append', help='bigPsl format')
+
     # BigWig Management
     parser.add_argument('--bigwig', action='append', help='BigWig format')
 
@@ -113,15 +121,18 @@
     array_inputs_bam = args.bam
     array_inputs_bed_generic = args.bed
     array_inputs_bed_simple_repeats = args.bedSimpleRepeats
+    array_inputs_bed_splice_junctions = args.bedSpliceJunctions
     array_inputs_bigwig = args.bigwig
     array_inputs_gff3 = args.gff3
     array_inputs_gtf = args.gtf
     array_inputs_psl = args.psl
+    array_inputs_bigpsl = args.bigpsl
 
     outputFile = args.output
 
     json_inputs_data = args.data_json
 
+    # TODO: Instead use a class to properly store the objects, with object_hook
     inputs_data = json.loads(json_inputs_data)
     # We remove the spaces in ["name"] of inputs_data
     sanitize_name_inputs(inputs_data)
@@ -138,9 +149,11 @@
                         (array_inputs_bed_generic, Bed),
                         (array_inputs_bigwig, BigWig),
                         (array_inputs_bed_simple_repeats, BedSimpleRepeats),
+                        (array_inputs_bed_splice_junctions, BedSpliceJunctions),
                         (array_inputs_gff3, Gff3),
                         (array_inputs_gtf, Gtf),
-                        (array_inputs_psl, Psl)]:
+                        (array_inputs_psl, Psl),
+                        (array_inputs_bigpsl, bigPsl)]:
         if inputs:
             all_datatype_dictionary.update(create_ordered_datatype_objects(datatype_class, inputs, inputs_data))
 
@@ -167,17 +180,30 @@
 
 
 def sanitize_name_input(string_to_sanitize):
-        return string_to_sanitize \
+    """
+    Sanitize the string passed in parameter by replacing '/' and ' ' by '_'
+
+    :param string_to_sanitize:
+    :return :
+
+    :Example:
+
+    >>> sanitize_name_input('this/is an//example')
+    this_is_an__example
+    """
+    return string_to_sanitize \
             .replace("/", "_") \
             .replace(" ", "_")
 
 
 def sanitize_name_inputs(inputs_data):
     """
-    Sometimes output from Galaxy, or even just file name from user have spaces
-    Also, it can contain '/' character and could break the use of os.path function
+    Sanitize value of the keys "name" of the dictionary passed in parameter.
+
+    Because sometimes output from Galaxy, or even just file name, from user inputs, have spaces.
+    Also, it can contain '/' character and could break the use of os.path function.
+
     :param inputs_data: dict[string, dict[string, string]]
-    :return:
     """
     for key in inputs_data:
         inputs_data[key]["name"] = sanitize_name_input(inputs_data[key]["name"])
@@ -187,9 +213,14 @@
     """
     Function which executes the creation all the necessary files / folders for a special Datatype, for TrackHub
     and update the dictionary of datatype
-    :param ExtensionClass: T <= Datatype
-    :param array_inputs: list[string]
+
+    :param ExtensionClass:
+    :param array_inputs:
     :param inputs_data:
+    :type ExtensionClass: Datatype
+    :type array_inputs: list[string]
+    :type inputs_data: dict
+    :rtype: dict
     """
 
     datatype_dictionary = {}
@@ -235,8 +266,9 @@
             in STDERR
         - And further access to debug if needed
             in .log
-    :return:
+
     """
+
     if not log_stdout:
         raise Exception("No log_stdout given. Stopping the application")
 
@@ -255,7 +287,7 @@
         - Dev needs to have WARN, ERROR and CRITICAL but well formatted / without traceback, in stdout
         - Still access to full, brute and traceback in stderr for errors
         - And further access to debug if needed
-    :return:
+
     """
     if not log_stdout:
         raise Exception("No log_stdout given. Stopping the application")
@@ -273,7 +305,6 @@
 def configure_logger_stderr():
     """
     Configure what should be logged in stderr
-    :return:
     """
     log_error = logging.StreamHandler(sys.stderr)
     log_error.setLevel(logging.ERROR)