Mercurial > repos > rmarenco > hubarchivecreator
comparison hubArchiveCreator.py @ 11:d05236b15f81 draft
planemo upload for repository https://github.com/goeckslab/hub-archive-creator commit 3760d0c8353b924ecf994131a5c2eb381aa81fb2
| author | rmarenco |
|---|---|
| date | Wed, 27 Jul 2016 10:10:49 -0400 |
| parents | acc233161f50 |
| children | 25809f699cb3 |
comparison
equal
deleted
inserted
replaced
| 10:acc233161f50 | 11:d05236b15f81 |
|---|---|
| 80 # TODO: Replace these with the object Fasta | 80 # TODO: Replace these with the object Fasta |
| 81 input_fasta_file = array_inputs_reference_genome["false_path"] | 81 input_fasta_file = array_inputs_reference_genome["false_path"] |
| 82 input_fasta_file_name = sanitize_name_input(array_inputs_reference_genome["name"]) | 82 input_fasta_file_name = sanitize_name_input(array_inputs_reference_genome["name"]) |
| 83 genome_name = sanitize_name_input(args.genome_name) | 83 genome_name = sanitize_name_input(args.genome_name) |
| 84 | 84 |
| 85 reference_genome = Fasta(array_inputs_reference_genome["false_path"], | 85 reference_genome = Fasta(input_fasta_file, |
| 86 input_fasta_file_name, genome_name) | 86 input_fasta_file_name, genome_name) |
| 87 | 87 |
| 88 user_email = args.user_email | 88 user_email = args.user_email |
| 89 | 89 |
| 90 # TODO: Add array for each input because we can add multiple -b for example + filter the data associated | |
| 91 | 90 |
| 91 # TODO: Use a class to have a better management of the structure of these inputs | |
| 92 # These inputs are populated in the Galaxy Wrapper xml and are in this format: | |
| 93 # ARRAY[DICT{FILE_PATH: DICT{NAME: NAME_VALUE, EXTRA_DATA: EXTRA_DATA_VALUE}}] | |
| 94 # EXTRA_DATA could be anything, for example the index of a BAM => {"index", FILE_PATH} | |
| 92 array_inputs_gff3 = args.gff3 | 95 array_inputs_gff3 = args.gff3 |
| 93 array_inputs_bed_simple_repeats = args.bedSimpleRepeats | 96 array_inputs_bed_simple_repeats = args.bedSimpleRepeats |
| 94 array_inputs_bed_generic = args.bed | 97 array_inputs_bed_generic = args.bed |
| 95 array_inputs_gtf = args.gtf | 98 array_inputs_gtf = args.gtf |
| 96 array_inputs_bam = args.bam | 99 array_inputs_bam = args.bam |
| 115 # Create the Track Hub folder | 118 # Create the Track Hub folder |
| 116 trackHub = TrackHub(reference_genome, user_email, outputFile, extra_files_path, toolDirectory) | 119 trackHub = TrackHub(reference_genome, user_email, outputFile, extra_files_path, toolDirectory) |
| 117 | 120 |
| 118 all_datatype_dictionary = {} | 121 all_datatype_dictionary = {} |
| 119 | 122 |
| 120 datatype_parameters = (inputs_data, all_datatype_dictionary) | 123 for (inputs, datatype_class) in [(array_inputs_gff3, Gff3), |
| 121 | 124 (array_inputs_bed_simple_repeats, BedSimpleRepeats), |
| 122 # Process Augustus | 125 (array_inputs_bed_generic, Bed), |
| 123 if array_inputs_gff3: | 126 (array_inputs_gtf, Gtf), |
| 124 create_ordered_datatype_objects(Gff3, array_inputs_gff3, *datatype_parameters) | 127 (array_inputs_bam, Bam), |
| 125 | 128 (array_inputs_bigwig, BigWig)]: |
| 126 # Process Bed simple repeats | 129 if inputs: |
| 127 if array_inputs_bed_simple_repeats: | 130 all_datatype_dictionary.update(create_ordered_datatype_objects(datatype_class, inputs, inputs_data)) |
| 128 create_ordered_datatype_objects(BedSimpleRepeats, array_inputs_bed_simple_repeats, *datatype_parameters) | |
| 129 | |
| 130 # Process Bed | |
| 131 if array_inputs_bed_generic: | |
| 132 create_ordered_datatype_objects(Bed, array_inputs_bed_generic, *datatype_parameters) | |
| 133 | |
| 134 # Process GTF | |
| 135 if array_inputs_gtf: | |
| 136 create_ordered_datatype_objects(Gtf, array_inputs_gtf, *datatype_parameters) | |
| 137 | |
| 138 # Process Bam | |
| 139 if array_inputs_bam: | |
| 140 create_ordered_datatype_objects(Bam, array_inputs_bam, *datatype_parameters) | |
| 141 | |
| 142 # Process BigWig | |
| 143 if array_inputs_bigwig: | |
| 144 create_ordered_datatype_objects(BigWig, array_inputs_bigwig, *datatype_parameters) | |
| 145 | 131 |
| 146 # Create Ordered Dictionary to add the tracks in the tool form order | 132 # Create Ordered Dictionary to add the tracks in the tool form order |
| 147 all_datatype_ordered_dictionary = collections.OrderedDict(all_datatype_dictionary) | 133 all_datatype_ordered_dictionary = collections.OrderedDict(all_datatype_dictionary) |
| 148 | 134 |
| 149 for index, datatypeObject in all_datatype_ordered_dictionary.iteritems(): | 135 for index, datatypeObject in all_datatype_ordered_dictionary.iteritems(): |
| 153 #trackHub.createZip() | 139 #trackHub.createZip() |
| 154 | 140 |
| 155 # We terminate le process and so create a HTML file summarizing all the files | 141 # We terminate le process and so create a HTML file summarizing all the files |
| 156 trackHub.terminate() | 142 trackHub.terminate() |
| 157 | 143 |
| 144 print "\t" | |
| 145 print "--------------" | |
| 146 print "Well done guys! Your data are ready to be displayed in UCSC Track Hub." | |
| 147 | |
| 158 sys.exit(0) | 148 sys.exit(0) |
| 149 | |
| 159 | 150 |
| 160 def sanitize_name_input(string_to_sanitize): | 151 def sanitize_name_input(string_to_sanitize): |
| 161 return string_to_sanitize \ | 152 return string_to_sanitize \ |
| 162 .replace("/", "_") \ | 153 .replace("/", "_") \ |
| 163 .replace(" ", "_") | 154 .replace(" ", "_") |
| 155 | |
| 164 | 156 |
| 165 def sanitize_name_inputs(inputs_data): | 157 def sanitize_name_inputs(inputs_data): |
| 166 """ | 158 """ |
| 167 Sometimes output from Galaxy, or even just file name from user have spaces | 159 Sometimes output from Galaxy, or even just file name from user have spaces |
| 168 Also, it can contain '/' character and could break the use of os.path function | 160 Also, it can contain '/' character and could break the use of os.path function |
| 171 """ | 163 """ |
| 172 for key in inputs_data: | 164 for key in inputs_data: |
| 173 inputs_data[key]["name"] = sanitize_name_input(inputs_data[key]["name"]) | 165 inputs_data[key]["name"] = sanitize_name_input(inputs_data[key]["name"]) |
| 174 | 166 |
| 175 | 167 |
| 176 def create_ordered_datatype_objects(ExtensionClass, array_inputs, inputs_data, all_datatype_dictionary): | 168 def create_ordered_datatype_objects(ExtensionClass, array_inputs, inputs_data): |
| 177 """ | 169 """ |
| 178 Function which executes the creation all the necessary files / folders for a special Datatype, for TrackHub | 170 Function which executes the creation all the necessary files / folders for a special Datatype, for TrackHub |
| 179 and update the dictionary of datatype | 171 and update the dictionary of datatype |
| 180 :param ExtensionClass: T <= Datatype | 172 :param ExtensionClass: T <= Datatype |
| 181 :param array_inputs: list[string] | 173 :param array_inputs: list[string] |
| 187 # TODO: Optimize this double loop | 179 # TODO: Optimize this double loop |
| 188 for input_false_path in array_inputs: | 180 for input_false_path in array_inputs: |
| 189 for key, data_value in inputs_data.items(): | 181 for key, data_value in inputs_data.items(): |
| 190 if key == input_false_path: | 182 if key == input_false_path: |
| 191 extensionObject = ExtensionClass(input_false_path, data_value) | 183 extensionObject = ExtensionClass(input_false_path, data_value) |
| 192 | |
| 193 datatype_dictionary.update({data_value["order_index"]: extensionObject}) | 184 datatype_dictionary.update({data_value["order_index"]: extensionObject}) |
| 194 all_datatype_dictionary.update(datatype_dictionary) | 185 return datatype_dictionary |
| 195 | 186 |
| 196 if __name__ == "__main__": | 187 if __name__ == "__main__": |
| 197 main(sys.argv) | 188 main(sys.argv) |
