comparison hubArchiveCreator.py @ 10:acc233161f50 draft

planemo upload for repository https://github.com/goeckslab/hub-archive-creator commit 1b1063f90004764bcf504f4340738eca5c4b1f9d
author rmarenco
date Thu, 21 Jul 2016 05:58:51 -0400
parents 4f9847539a28
children d05236b15f81
comparison
equal deleted inserted replaced
9:4f9847539a28 10:acc233161f50
12 import collections 12 import collections
13 import json 13 import json
14 import sys 14 import sys
15 15
16 # Internal dependencies 16 # Internal dependencies
17 from TrackHub import TrackHub
18 from Gff3 import Gff3
19 from Bam import Bam 17 from Bam import Bam
20 from BedSimpleRepeats import BedSimpleRepeats 18 from BedSimpleRepeats import BedSimpleRepeats
21 from Bed import Bed 19 from Bed import Bed
22 from BigWig import BigWig 20 from BigWig import BigWig
21 from util.Fasta import Fasta
22 from Gff3 import Gff3
23 from Gtf import Gtf 23 from Gtf import Gtf
24 from TrackHub import TrackHub
24 25
25 26
26 # TODO: Verify each subprocessed dependency is accessible [gff3ToGenePred, genePredToBed, twoBitInfo, faToTwoBit, bedToBigBed, sort 27 # TODO: Verify each subprocessed dependency is accessible [gff3ToGenePred, genePredToBed, twoBitInfo, faToTwoBit, bedToBigBed, sort
27 28
28 29
60 help='Name, in galaxy, of the output folder. Where you would want to build the Track Hub Archive') 61 help='Name, in galaxy, of the output folder. Where you would want to build the Track Hub Archive')
61 parser.add_argument('-o', '--output', help='Name of the HTML summarizing the content of the Track Hub Archive') 62 parser.add_argument('-o', '--output', help='Name of the HTML summarizing the content of the Track Hub Archive')
62 63
63 parser.add_argument('-j', '--data_json', help='Json containing the metadata of the inputs') 64 parser.add_argument('-j', '--data_json', help='Json containing the metadata of the inputs')
64 65
66 parser.add_argument('--user_email', help='Email of the user who launched the Hub Archive Creation')
67
68 parser.add_argument('--genome_name', help='UCSC Genome Browser assembly ID')
69
65 ucsc_tools_path = '' 70 ucsc_tools_path = ''
66 71
67 toolDirectory = '.' 72 toolDirectory = '.'
68 extra_files_path = '.' 73 extra_files_path = '.'
69 74
70 # Get the args passed in parameter 75 # Get the args passed in parameter
71 args = parser.parse_args() 76 args = parser.parse_args()
72 77
73 input_fasta_file = args.fasta 78 array_inputs_reference_genome = json.loads(args.fasta)
79
80 # TODO: Replace these with the object Fasta
81 input_fasta_file = array_inputs_reference_genome["false_path"]
82 input_fasta_file_name = sanitize_name_input(array_inputs_reference_genome["name"])
83 genome_name = sanitize_name_input(args.genome_name)
84
85 reference_genome = Fasta(array_inputs_reference_genome["false_path"],
86 input_fasta_file_name, genome_name)
87
88 user_email = args.user_email
74 89
75 # TODO: Add array for each input because we can add multiple -b for example + filter the data associated 90 # TODO: Add array for each input because we can add multiple -b for example + filter the data associated
76
77 91
78 array_inputs_gff3 = args.gff3 92 array_inputs_gff3 = args.gff3
79 array_inputs_bed_simple_repeats = args.bedSimpleRepeats 93 array_inputs_bed_simple_repeats = args.bedSimpleRepeats
80 array_inputs_bed_generic = args.bed 94 array_inputs_bed_generic = args.bed
81 array_inputs_gtf = args.gtf 95 array_inputs_gtf = args.gtf
94 if args.directory: 108 if args.directory:
95 toolDirectory = args.directory 109 toolDirectory = args.directory
96 if args.extra_files_path: 110 if args.extra_files_path:
97 extra_files_path = args.extra_files_path 111 extra_files_path = args.extra_files_path
98 112
99 # TODO: Check here all the binaries / tools we need. Exception is missing 113 # TODO: Check here all the binaries / tools we need. Exception if missing
100 114
101 # Create the Track Hub folder 115 # Create the Track Hub folder
102 trackHub = TrackHub(input_fasta_file, outputFile, extra_files_path, toolDirectory) 116 trackHub = TrackHub(reference_genome, user_email, outputFile, extra_files_path, toolDirectory)
103 117
104 all_datatype_dictionary = {} 118 all_datatype_dictionary = {}
105 119
120 datatype_parameters = (inputs_data, all_datatype_dictionary)
121
106 # Process Augustus 122 # Process Augustus
107 if array_inputs_gff3: 123 if array_inputs_gff3:
108 create_ordered_datatype_objects(Gff3, array_inputs_gff3, inputs_data, input_fasta_file, 124 create_ordered_datatype_objects(Gff3, array_inputs_gff3, *datatype_parameters)
109 extra_files_path, all_datatype_dictionary, toolDirectory)
110 125
111 # Process Bed simple repeats => From Tandem Repeats Finder / TrfBig 126 # Process Bed simple repeats
112 if array_inputs_bed_simple_repeats: 127 if array_inputs_bed_simple_repeats:
113 create_ordered_datatype_objects(BedSimpleRepeats, array_inputs_bed_simple_repeats, inputs_data, input_fasta_file, 128 create_ordered_datatype_objects(BedSimpleRepeats, array_inputs_bed_simple_repeats, *datatype_parameters)
114 extra_files_path, all_datatype_dictionary, toolDirectory)
115 129
116 # Process a Bed => tBlastN or TopHat 130 # Process Bed
117 if array_inputs_bed_generic: 131 if array_inputs_bed_generic:
118 create_ordered_datatype_objects(Bed, array_inputs_bed_generic, inputs_data, input_fasta_file, 132 create_ordered_datatype_objects(Bed, array_inputs_bed_generic, *datatype_parameters)
119 extra_files_path, all_datatype_dictionary, toolDirectory)
120 133
121 # Process a GTF => Tophat 134 # Process GTF
122 if array_inputs_gtf: 135 if array_inputs_gtf:
123 create_ordered_datatype_objects(Gtf, array_inputs_gtf, inputs_data, input_fasta_file, 136 create_ordered_datatype_objects(Gtf, array_inputs_gtf, *datatype_parameters)
124 extra_files_path, all_datatype_dictionary, toolDirectory)
125 137
126 # Process a Bam => Tophat 138 # Process Bam
127 if array_inputs_bam: 139 if array_inputs_bam:
128 create_ordered_datatype_objects(Bam, array_inputs_bam, inputs_data, input_fasta_file, 140 create_ordered_datatype_objects(Bam, array_inputs_bam, *datatype_parameters)
129 extra_files_path, all_datatype_dictionary, toolDirectory)
130 141
131 # Process a BigWig => From Bam 142 # Process BigWig
132 if array_inputs_bigwig: 143 if array_inputs_bigwig:
133 create_ordered_datatype_objects(BigWig, array_inputs_bigwig, inputs_data, input_fasta_file, 144 create_ordered_datatype_objects(BigWig, array_inputs_bigwig, *datatype_parameters)
134 extra_files_path, all_datatype_dictionary, toolDirectory)
135 145
136 # Create Ordered Dictionary to add the tracks in the tool form order 146 # Create Ordered Dictionary to add the tracks in the tool form order
137 all_datatype_ordered_dictionary = collections.OrderedDict(all_datatype_dictionary) 147 all_datatype_ordered_dictionary = collections.OrderedDict(all_datatype_dictionary)
138 148
139 for index, datatypeObject in all_datatype_ordered_dictionary.iteritems(): 149 for index, datatypeObject in all_datatype_ordered_dictionary.iteritems():
145 # We terminate le process and so create a HTML file summarizing all the files 155 # We terminate le process and so create a HTML file summarizing all the files
146 trackHub.terminate() 156 trackHub.terminate()
147 157
148 sys.exit(0) 158 sys.exit(0)
149 159
160 def sanitize_name_input(string_to_sanitize):
161 return string_to_sanitize \
162 .replace("/", "_") \
163 .replace(" ", "_")
150 164
151 def sanitize_name_inputs(inputs_data): 165 def sanitize_name_inputs(inputs_data):
152 """ 166 """
153 Sometimes output from Galaxy, or even just file name from user have spaces 167 Sometimes output from Galaxy, or even just file name from user have spaces
154 Also, it can contain '/' character and could break the use of os.path function 168 Also, it can contain '/' character and could break the use of os.path function
155 :param inputs_data: dict[string, dict[string, string]] 169 :param inputs_data: dict[string, dict[string, string]]
156 :return: 170 :return:
157 """ 171 """
158 for key in inputs_data: 172 for key in inputs_data:
159 inputs_data[key]["name"] = inputs_data[key]["name"]\ 173 inputs_data[key]["name"] = sanitize_name_input(inputs_data[key]["name"])
160 .replace("/", "_")\
161 .replace(" ", "_")
162 174
163 175
164 def create_ordered_datatype_objects(ExtensionClass, array_inputs, inputs_data, input_fasta_file, 176 def create_ordered_datatype_objects(ExtensionClass, array_inputs, inputs_data, all_datatype_dictionary):
165 extra_files_path, all_datatype_dictionary, tool_directory):
166 """ 177 """
167 Function which executes the creation all the necessary files / folders for a special Datatype, for TrackHub 178 Function which executes the creation all the necessary files / folders for a special Datatype, for TrackHub
168 and update the dictionary of datatype 179 and update the dictionary of datatype
169 :param ExtensionClass: T <= Datatype 180 :param ExtensionClass: T <= Datatype
170 :param array_inputs: list[string] 181 :param array_inputs: list[string]
171 :param inputs_data: 182 :param inputs_data:
172 :param input_fasta_file: string
173 :param extra_files_path: string
174 :param tool_directory; string
175 """ 183 """
176 184
177 datatype_dictionary = {} 185 datatype_dictionary = {}
178 186
179 # TODO: Optimize this double loop 187 # TODO: Optimize this double loop
180 for input_false_path in array_inputs: 188 for input_false_path in array_inputs:
181 for key, data_value in inputs_data.items(): 189 for key, data_value in inputs_data.items():
182 if key == input_false_path: 190 if key == input_false_path:
183 extensionObject = ExtensionClass(input_false_path, data_value, 191 extensionObject = ExtensionClass(input_false_path, data_value)
184 input_fasta_file, extra_files_path, tool_directory) 192
185 datatype_dictionary.update({data_value["order_index"]: extensionObject}) 193 datatype_dictionary.update({data_value["order_index"]: extensionObject})
186 all_datatype_dictionary.update(datatype_dictionary) 194 all_datatype_dictionary.update(datatype_dictionary)
187 195
188 if __name__ == "__main__": 196 if __name__ == "__main__":
189 main(sys.argv) 197 main(sys.argv)