comparison hubArchiveCreator.py @ 11:d05236b15f81 draft

planemo upload for repository https://github.com/goeckslab/hub-archive-creator commit 3760d0c8353b924ecf994131a5c2eb381aa81fb2
author rmarenco
date Wed, 27 Jul 2016 10:10:49 -0400
parents acc233161f50
children 25809f699cb3
comparison
equal deleted inserted replaced
10:acc233161f50 11:d05236b15f81
80 # TODO: Replace these with the object Fasta 80 # TODO: Replace these with the object Fasta
81 input_fasta_file = array_inputs_reference_genome["false_path"] 81 input_fasta_file = array_inputs_reference_genome["false_path"]
82 input_fasta_file_name = sanitize_name_input(array_inputs_reference_genome["name"]) 82 input_fasta_file_name = sanitize_name_input(array_inputs_reference_genome["name"])
83 genome_name = sanitize_name_input(args.genome_name) 83 genome_name = sanitize_name_input(args.genome_name)
84 84
85 reference_genome = Fasta(array_inputs_reference_genome["false_path"], 85 reference_genome = Fasta(input_fasta_file,
86 input_fasta_file_name, genome_name) 86 input_fasta_file_name, genome_name)
87 87
88 user_email = args.user_email 88 user_email = args.user_email
89 89
90 # TODO: Add array for each input because we can add multiple -b for example + filter the data associated
91 90
91 # TODO: Use a class to have a better management of the structure of these inputs
92 # These inputs are populated in the Galaxy Wrapper xml and are in this format:
93 # ARRAY[DICT{FILE_PATH: DICT{NAME: NAME_VALUE, EXTRA_DATA: EXTRA_DATA_VALUE}}]
94 # EXTRA_DATA could be anything, for example the index of a BAM => {"index", FILE_PATH}
92 array_inputs_gff3 = args.gff3 95 array_inputs_gff3 = args.gff3
93 array_inputs_bed_simple_repeats = args.bedSimpleRepeats 96 array_inputs_bed_simple_repeats = args.bedSimpleRepeats
94 array_inputs_bed_generic = args.bed 97 array_inputs_bed_generic = args.bed
95 array_inputs_gtf = args.gtf 98 array_inputs_gtf = args.gtf
96 array_inputs_bam = args.bam 99 array_inputs_bam = args.bam
115 # Create the Track Hub folder 118 # Create the Track Hub folder
116 trackHub = TrackHub(reference_genome, user_email, outputFile, extra_files_path, toolDirectory) 119 trackHub = TrackHub(reference_genome, user_email, outputFile, extra_files_path, toolDirectory)
117 120
118 all_datatype_dictionary = {} 121 all_datatype_dictionary = {}
119 122
120 datatype_parameters = (inputs_data, all_datatype_dictionary) 123 for (inputs, datatype_class) in [(array_inputs_gff3, Gff3),
121 124 (array_inputs_bed_simple_repeats, BedSimpleRepeats),
122 # Process Augustus 125 (array_inputs_bed_generic, Bed),
123 if array_inputs_gff3: 126 (array_inputs_gtf, Gtf),
124 create_ordered_datatype_objects(Gff3, array_inputs_gff3, *datatype_parameters) 127 (array_inputs_bam, Bam),
125 128 (array_inputs_bigwig, BigWig)]:
126 # Process Bed simple repeats 129 if inputs:
127 if array_inputs_bed_simple_repeats: 130 all_datatype_dictionary.update(create_ordered_datatype_objects(datatype_class, inputs, inputs_data))
128 create_ordered_datatype_objects(BedSimpleRepeats, array_inputs_bed_simple_repeats, *datatype_parameters)
129
130 # Process Bed
131 if array_inputs_bed_generic:
132 create_ordered_datatype_objects(Bed, array_inputs_bed_generic, *datatype_parameters)
133
134 # Process GTF
135 if array_inputs_gtf:
136 create_ordered_datatype_objects(Gtf, array_inputs_gtf, *datatype_parameters)
137
138 # Process Bam
139 if array_inputs_bam:
140 create_ordered_datatype_objects(Bam, array_inputs_bam, *datatype_parameters)
141
142 # Process BigWig
143 if array_inputs_bigwig:
144 create_ordered_datatype_objects(BigWig, array_inputs_bigwig, *datatype_parameters)
145 131
146 # Create Ordered Dictionary to add the tracks in the tool form order 132 # Create Ordered Dictionary to add the tracks in the tool form order
147 all_datatype_ordered_dictionary = collections.OrderedDict(all_datatype_dictionary) 133 all_datatype_ordered_dictionary = collections.OrderedDict(all_datatype_dictionary)
148 134
149 for index, datatypeObject in all_datatype_ordered_dictionary.iteritems(): 135 for index, datatypeObject in all_datatype_ordered_dictionary.iteritems():
153 #trackHub.createZip() 139 #trackHub.createZip()
154 140
155 # We terminate le process and so create a HTML file summarizing all the files 141 # We terminate le process and so create a HTML file summarizing all the files
156 trackHub.terminate() 142 trackHub.terminate()
157 143
144 print "\t"
145 print "--------------"
146 print "Well done guys! Your data are ready to be displayed in UCSC Track Hub."
147
158 sys.exit(0) 148 sys.exit(0)
149
159 150
160 def sanitize_name_input(string_to_sanitize): 151 def sanitize_name_input(string_to_sanitize):
161 return string_to_sanitize \ 152 return string_to_sanitize \
162 .replace("/", "_") \ 153 .replace("/", "_") \
163 .replace(" ", "_") 154 .replace(" ", "_")
155
164 156
165 def sanitize_name_inputs(inputs_data): 157 def sanitize_name_inputs(inputs_data):
166 """ 158 """
167 Sometimes output from Galaxy, or even just file name from user have spaces 159 Sometimes output from Galaxy, or even just file name from user have spaces
168 Also, it can contain '/' character and could break the use of os.path function 160 Also, it can contain '/' character and could break the use of os.path function
171 """ 163 """
172 for key in inputs_data: 164 for key in inputs_data:
173 inputs_data[key]["name"] = sanitize_name_input(inputs_data[key]["name"]) 165 inputs_data[key]["name"] = sanitize_name_input(inputs_data[key]["name"])
174 166
175 167
176 def create_ordered_datatype_objects(ExtensionClass, array_inputs, inputs_data, all_datatype_dictionary): 168 def create_ordered_datatype_objects(ExtensionClass, array_inputs, inputs_data):
177 """ 169 """
178 Function which executes the creation all the necessary files / folders for a special Datatype, for TrackHub 170 Function which executes the creation all the necessary files / folders for a special Datatype, for TrackHub
179 and update the dictionary of datatype 171 and update the dictionary of datatype
180 :param ExtensionClass: T <= Datatype 172 :param ExtensionClass: T <= Datatype
181 :param array_inputs: list[string] 173 :param array_inputs: list[string]
187 # TODO: Optimize this double loop 179 # TODO: Optimize this double loop
188 for input_false_path in array_inputs: 180 for input_false_path in array_inputs:
189 for key, data_value in inputs_data.items(): 181 for key, data_value in inputs_data.items():
190 if key == input_false_path: 182 if key == input_false_path:
191 extensionObject = ExtensionClass(input_false_path, data_value) 183 extensionObject = ExtensionClass(input_false_path, data_value)
192
193 datatype_dictionary.update({data_value["order_index"]: extensionObject}) 184 datatype_dictionary.update({data_value["order_index"]: extensionObject})
194 all_datatype_dictionary.update(datatype_dictionary) 185 return datatype_dictionary
195 186
196 if __name__ == "__main__": 187 if __name__ == "__main__":
197 main(sys.argv) 188 main(sys.argv)