Mercurial > repos > rmarenco > hubarchivecreator
comparison hubArchiveCreator.py @ 11:d05236b15f81 draft
planemo upload for repository https://github.com/goeckslab/hub-archive-creator commit 3760d0c8353b924ecf994131a5c2eb381aa81fb2
author | rmarenco |
---|---|
date | Wed, 27 Jul 2016 10:10:49 -0400 |
parents | acc233161f50 |
children | 25809f699cb3 |
comparison
equal
deleted
inserted
replaced
10:acc233161f50 | 11:d05236b15f81 |
---|---|
80 # TODO: Replace these with the object Fasta | 80 # TODO: Replace these with the object Fasta |
81 input_fasta_file = array_inputs_reference_genome["false_path"] | 81 input_fasta_file = array_inputs_reference_genome["false_path"] |
82 input_fasta_file_name = sanitize_name_input(array_inputs_reference_genome["name"]) | 82 input_fasta_file_name = sanitize_name_input(array_inputs_reference_genome["name"]) |
83 genome_name = sanitize_name_input(args.genome_name) | 83 genome_name = sanitize_name_input(args.genome_name) |
84 | 84 |
85 reference_genome = Fasta(array_inputs_reference_genome["false_path"], | 85 reference_genome = Fasta(input_fasta_file, |
86 input_fasta_file_name, genome_name) | 86 input_fasta_file_name, genome_name) |
87 | 87 |
88 user_email = args.user_email | 88 user_email = args.user_email |
89 | 89 |
90 # TODO: Add array for each input because we can add multiple -b for example + filter the data associated | |
91 | 90 |
91 # TODO: Use a class to have a better management of the structure of these inputs | |
92 # These inputs are populated in the Galaxy Wrapper xml and are in this format: | |
93 # ARRAY[DICT{FILE_PATH: DICT{NAME: NAME_VALUE, EXTRA_DATA: EXTRA_DATA_VALUE}}] | |
94 # EXTRA_DATA could be anything, for example the index of a BAM => {"index", FILE_PATH} | |
92 array_inputs_gff3 = args.gff3 | 95 array_inputs_gff3 = args.gff3 |
93 array_inputs_bed_simple_repeats = args.bedSimpleRepeats | 96 array_inputs_bed_simple_repeats = args.bedSimpleRepeats |
94 array_inputs_bed_generic = args.bed | 97 array_inputs_bed_generic = args.bed |
95 array_inputs_gtf = args.gtf | 98 array_inputs_gtf = args.gtf |
96 array_inputs_bam = args.bam | 99 array_inputs_bam = args.bam |
115 # Create the Track Hub folder | 118 # Create the Track Hub folder |
116 trackHub = TrackHub(reference_genome, user_email, outputFile, extra_files_path, toolDirectory) | 119 trackHub = TrackHub(reference_genome, user_email, outputFile, extra_files_path, toolDirectory) |
117 | 120 |
118 all_datatype_dictionary = {} | 121 all_datatype_dictionary = {} |
119 | 122 |
120 datatype_parameters = (inputs_data, all_datatype_dictionary) | 123 for (inputs, datatype_class) in [(array_inputs_gff3, Gff3), |
121 | 124 (array_inputs_bed_simple_repeats, BedSimpleRepeats), |
122 # Process Augustus | 125 (array_inputs_bed_generic, Bed), |
123 if array_inputs_gff3: | 126 (array_inputs_gtf, Gtf), |
124 create_ordered_datatype_objects(Gff3, array_inputs_gff3, *datatype_parameters) | 127 (array_inputs_bam, Bam), |
125 | 128 (array_inputs_bigwig, BigWig)]: |
126 # Process Bed simple repeats | 129 if inputs: |
127 if array_inputs_bed_simple_repeats: | 130 all_datatype_dictionary.update(create_ordered_datatype_objects(datatype_class, inputs, inputs_data)) |
128 create_ordered_datatype_objects(BedSimpleRepeats, array_inputs_bed_simple_repeats, *datatype_parameters) | |
129 | |
130 # Process Bed | |
131 if array_inputs_bed_generic: | |
132 create_ordered_datatype_objects(Bed, array_inputs_bed_generic, *datatype_parameters) | |
133 | |
134 # Process GTF | |
135 if array_inputs_gtf: | |
136 create_ordered_datatype_objects(Gtf, array_inputs_gtf, *datatype_parameters) | |
137 | |
138 # Process Bam | |
139 if array_inputs_bam: | |
140 create_ordered_datatype_objects(Bam, array_inputs_bam, *datatype_parameters) | |
141 | |
142 # Process BigWig | |
143 if array_inputs_bigwig: | |
144 create_ordered_datatype_objects(BigWig, array_inputs_bigwig, *datatype_parameters) | |
145 | 131 |
146 # Create Ordered Dictionary to add the tracks in the tool form order | 132 # Create Ordered Dictionary to add the tracks in the tool form order |
147 all_datatype_ordered_dictionary = collections.OrderedDict(all_datatype_dictionary) | 133 all_datatype_ordered_dictionary = collections.OrderedDict(all_datatype_dictionary) |
148 | 134 |
149 for index, datatypeObject in all_datatype_ordered_dictionary.iteritems(): | 135 for index, datatypeObject in all_datatype_ordered_dictionary.iteritems(): |
153 #trackHub.createZip() | 139 #trackHub.createZip() |
154 | 140 |
155 # We terminate le process and so create a HTML file summarizing all the files | 141 # We terminate le process and so create a HTML file summarizing all the files |
156 trackHub.terminate() | 142 trackHub.terminate() |
157 | 143 |
144 print "\t" | |
145 print "--------------" | |
146 print "Well done guys! Your data are ready to be displayed in UCSC Track Hub." | |
147 | |
158 sys.exit(0) | 148 sys.exit(0) |
149 | |
159 | 150 |
160 def sanitize_name_input(string_to_sanitize): | 151 def sanitize_name_input(string_to_sanitize): |
161 return string_to_sanitize \ | 152 return string_to_sanitize \ |
162 .replace("/", "_") \ | 153 .replace("/", "_") \ |
163 .replace(" ", "_") | 154 .replace(" ", "_") |
155 | |
164 | 156 |
165 def sanitize_name_inputs(inputs_data): | 157 def sanitize_name_inputs(inputs_data): |
166 """ | 158 """ |
167 Sometimes output from Galaxy, or even just file name from user have spaces | 159 Sometimes output from Galaxy, or even just file name from user have spaces |
168 Also, it can contain '/' character and could break the use of os.path function | 160 Also, it can contain '/' character and could break the use of os.path function |
171 """ | 163 """ |
172 for key in inputs_data: | 164 for key in inputs_data: |
173 inputs_data[key]["name"] = sanitize_name_input(inputs_data[key]["name"]) | 165 inputs_data[key]["name"] = sanitize_name_input(inputs_data[key]["name"]) |
174 | 166 |
175 | 167 |
176 def create_ordered_datatype_objects(ExtensionClass, array_inputs, inputs_data, all_datatype_dictionary): | 168 def create_ordered_datatype_objects(ExtensionClass, array_inputs, inputs_data): |
177 """ | 169 """ |
178 Function which executes the creation all the necessary files / folders for a special Datatype, for TrackHub | 170 Function which executes the creation all the necessary files / folders for a special Datatype, for TrackHub |
179 and update the dictionary of datatype | 171 and update the dictionary of datatype |
180 :param ExtensionClass: T <= Datatype | 172 :param ExtensionClass: T <= Datatype |
181 :param array_inputs: list[string] | 173 :param array_inputs: list[string] |
187 # TODO: Optimize this double loop | 179 # TODO: Optimize this double loop |
188 for input_false_path in array_inputs: | 180 for input_false_path in array_inputs: |
189 for key, data_value in inputs_data.items(): | 181 for key, data_value in inputs_data.items(): |
190 if key == input_false_path: | 182 if key == input_false_path: |
191 extensionObject = ExtensionClass(input_false_path, data_value) | 183 extensionObject = ExtensionClass(input_false_path, data_value) |
192 | |
193 datatype_dictionary.update({data_value["order_index"]: extensionObject}) | 184 datatype_dictionary.update({data_value["order_index"]: extensionObject}) |
194 all_datatype_dictionary.update(datatype_dictionary) | 185 return datatype_dictionary |
195 | 186 |
196 if __name__ == "__main__": | 187 if __name__ == "__main__": |
197 main(sys.argv) | 188 main(sys.argv) |