Mercurial > repos > rmarenco > hubarchivecreator
comparison hubArchiveCreator.py @ 10:acc233161f50 draft
planemo upload for repository https://github.com/goeckslab/hub-archive-creator commit 1b1063f90004764bcf504f4340738eca5c4b1f9d
author | rmarenco |
---|---|
date | Thu, 21 Jul 2016 05:58:51 -0400 |
parents | 4f9847539a28 |
children | d05236b15f81 |
comparison
equal
deleted
inserted
replaced
9:4f9847539a28 | 10:acc233161f50 |
---|---|
12 import collections | 12 import collections |
13 import json | 13 import json |
14 import sys | 14 import sys |
15 | 15 |
16 # Internal dependencies | 16 # Internal dependencies |
17 from TrackHub import TrackHub | |
18 from Gff3 import Gff3 | |
19 from Bam import Bam | 17 from Bam import Bam |
20 from BedSimpleRepeats import BedSimpleRepeats | 18 from BedSimpleRepeats import BedSimpleRepeats |
21 from Bed import Bed | 19 from Bed import Bed |
22 from BigWig import BigWig | 20 from BigWig import BigWig |
21 from util.Fasta import Fasta | |
22 from Gff3 import Gff3 | |
23 from Gtf import Gtf | 23 from Gtf import Gtf |
24 from TrackHub import TrackHub | |
24 | 25 |
25 | 26 |
26 # TODO: Verify each subprocessed dependency is accessible [gff3ToGenePred, genePredToBed, twoBitInfo, faToTwoBit, bedToBigBed, sort | 27 # TODO: Verify each subprocessed dependency is accessible [gff3ToGenePred, genePredToBed, twoBitInfo, faToTwoBit, bedToBigBed, sort |
27 | 28 |
28 | 29 |
60 help='Name, in galaxy, of the output folder. Where you would want to build the Track Hub Archive') | 61 help='Name, in galaxy, of the output folder. Where you would want to build the Track Hub Archive') |
61 parser.add_argument('-o', '--output', help='Name of the HTML summarizing the content of the Track Hub Archive') | 62 parser.add_argument('-o', '--output', help='Name of the HTML summarizing the content of the Track Hub Archive') |
62 | 63 |
63 parser.add_argument('-j', '--data_json', help='Json containing the metadata of the inputs') | 64 parser.add_argument('-j', '--data_json', help='Json containing the metadata of the inputs') |
64 | 65 |
66 parser.add_argument('--user_email', help='Email of the user who launched the Hub Archive Creation') | |
67 | |
68 parser.add_argument('--genome_name', help='UCSC Genome Browser assembly ID') | |
69 | |
65 ucsc_tools_path = '' | 70 ucsc_tools_path = '' |
66 | 71 |
67 toolDirectory = '.' | 72 toolDirectory = '.' |
68 extra_files_path = '.' | 73 extra_files_path = '.' |
69 | 74 |
70 # Get the args passed in parameter | 75 # Get the args passed in parameter |
71 args = parser.parse_args() | 76 args = parser.parse_args() |
72 | 77 |
73 input_fasta_file = args.fasta | 78 array_inputs_reference_genome = json.loads(args.fasta) |
79 | |
80 # TODO: Replace these with the object Fasta | |
81 input_fasta_file = array_inputs_reference_genome["false_path"] | |
82 input_fasta_file_name = sanitize_name_input(array_inputs_reference_genome["name"]) | |
83 genome_name = sanitize_name_input(args.genome_name) | |
84 | |
85 reference_genome = Fasta(array_inputs_reference_genome["false_path"], | |
86 input_fasta_file_name, genome_name) | |
87 | |
88 user_email = args.user_email | |
74 | 89 |
75 # TODO: Add array for each input because we can add multiple -b for example + filter the data associated | 90 # TODO: Add array for each input because we can add multiple -b for example + filter the data associated |
76 | |
77 | 91 |
78 array_inputs_gff3 = args.gff3 | 92 array_inputs_gff3 = args.gff3 |
79 array_inputs_bed_simple_repeats = args.bedSimpleRepeats | 93 array_inputs_bed_simple_repeats = args.bedSimpleRepeats |
80 array_inputs_bed_generic = args.bed | 94 array_inputs_bed_generic = args.bed |
81 array_inputs_gtf = args.gtf | 95 array_inputs_gtf = args.gtf |
94 if args.directory: | 108 if args.directory: |
95 toolDirectory = args.directory | 109 toolDirectory = args.directory |
96 if args.extra_files_path: | 110 if args.extra_files_path: |
97 extra_files_path = args.extra_files_path | 111 extra_files_path = args.extra_files_path |
98 | 112 |
99 # TODO: Check here all the binaries / tools we need. Exception is missing | 113 # TODO: Check here all the binaries / tools we need. Exception if missing |
100 | 114 |
101 # Create the Track Hub folder | 115 # Create the Track Hub folder |
102 trackHub = TrackHub(input_fasta_file, outputFile, extra_files_path, toolDirectory) | 116 trackHub = TrackHub(reference_genome, user_email, outputFile, extra_files_path, toolDirectory) |
103 | 117 |
104 all_datatype_dictionary = {} | 118 all_datatype_dictionary = {} |
105 | 119 |
120 datatype_parameters = (inputs_data, all_datatype_dictionary) | |
121 | |
106 # Process Augustus | 122 # Process Augustus |
107 if array_inputs_gff3: | 123 if array_inputs_gff3: |
108 create_ordered_datatype_objects(Gff3, array_inputs_gff3, inputs_data, input_fasta_file, | 124 create_ordered_datatype_objects(Gff3, array_inputs_gff3, *datatype_parameters) |
109 extra_files_path, all_datatype_dictionary, toolDirectory) | |
110 | 125 |
111 # Process Bed simple repeats => From Tandem Repeats Finder / TrfBig | 126 # Process Bed simple repeats |
112 if array_inputs_bed_simple_repeats: | 127 if array_inputs_bed_simple_repeats: |
113 create_ordered_datatype_objects(BedSimpleRepeats, array_inputs_bed_simple_repeats, inputs_data, input_fasta_file, | 128 create_ordered_datatype_objects(BedSimpleRepeats, array_inputs_bed_simple_repeats, *datatype_parameters) |
114 extra_files_path, all_datatype_dictionary, toolDirectory) | |
115 | 129 |
116 # Process a Bed => tBlastN or TopHat | 130 # Process Bed |
117 if array_inputs_bed_generic: | 131 if array_inputs_bed_generic: |
118 create_ordered_datatype_objects(Bed, array_inputs_bed_generic, inputs_data, input_fasta_file, | 132 create_ordered_datatype_objects(Bed, array_inputs_bed_generic, *datatype_parameters) |
119 extra_files_path, all_datatype_dictionary, toolDirectory) | |
120 | 133 |
121 # Process a GTF => Tophat | 134 # Process GTF |
122 if array_inputs_gtf: | 135 if array_inputs_gtf: |
123 create_ordered_datatype_objects(Gtf, array_inputs_gtf, inputs_data, input_fasta_file, | 136 create_ordered_datatype_objects(Gtf, array_inputs_gtf, *datatype_parameters) |
124 extra_files_path, all_datatype_dictionary, toolDirectory) | |
125 | 137 |
126 # Process a Bam => Tophat | 138 # Process Bam |
127 if array_inputs_bam: | 139 if array_inputs_bam: |
128 create_ordered_datatype_objects(Bam, array_inputs_bam, inputs_data, input_fasta_file, | 140 create_ordered_datatype_objects(Bam, array_inputs_bam, *datatype_parameters) |
129 extra_files_path, all_datatype_dictionary, toolDirectory) | |
130 | 141 |
131 # Process a BigWig => From Bam | 142 # Process BigWig |
132 if array_inputs_bigwig: | 143 if array_inputs_bigwig: |
133 create_ordered_datatype_objects(BigWig, array_inputs_bigwig, inputs_data, input_fasta_file, | 144 create_ordered_datatype_objects(BigWig, array_inputs_bigwig, *datatype_parameters) |
134 extra_files_path, all_datatype_dictionary, toolDirectory) | |
135 | 145 |
136 # Create Ordered Dictionary to add the tracks in the tool form order | 146 # Create Ordered Dictionary to add the tracks in the tool form order |
137 all_datatype_ordered_dictionary = collections.OrderedDict(all_datatype_dictionary) | 147 all_datatype_ordered_dictionary = collections.OrderedDict(all_datatype_dictionary) |
138 | 148 |
139 for index, datatypeObject in all_datatype_ordered_dictionary.iteritems(): | 149 for index, datatypeObject in all_datatype_ordered_dictionary.iteritems(): |
145 # We terminate le process and so create a HTML file summarizing all the files | 155 # We terminate le process and so create a HTML file summarizing all the files |
146 trackHub.terminate() | 156 trackHub.terminate() |
147 | 157 |
148 sys.exit(0) | 158 sys.exit(0) |
149 | 159 |
160 def sanitize_name_input(string_to_sanitize): | |
161 return string_to_sanitize \ | |
162 .replace("/", "_") \ | |
163 .replace(" ", "_") | |
150 | 164 |
151 def sanitize_name_inputs(inputs_data): | 165 def sanitize_name_inputs(inputs_data): |
152 """ | 166 """ |
153 Sometimes output from Galaxy, or even just file name from user have spaces | 167 Sometimes output from Galaxy, or even just file name from user have spaces |
154 Also, it can contain '/' character and could break the use of os.path function | 168 Also, it can contain '/' character and could break the use of os.path function |
155 :param inputs_data: dict[string, dict[string, string]] | 169 :param inputs_data: dict[string, dict[string, string]] |
156 :return: | 170 :return: |
157 """ | 171 """ |
158 for key in inputs_data: | 172 for key in inputs_data: |
159 inputs_data[key]["name"] = inputs_data[key]["name"]\ | 173 inputs_data[key]["name"] = sanitize_name_input(inputs_data[key]["name"]) |
160 .replace("/", "_")\ | |
161 .replace(" ", "_") | |
162 | 174 |
163 | 175 |
164 def create_ordered_datatype_objects(ExtensionClass, array_inputs, inputs_data, input_fasta_file, | 176 def create_ordered_datatype_objects(ExtensionClass, array_inputs, inputs_data, all_datatype_dictionary): |
165 extra_files_path, all_datatype_dictionary, tool_directory): | |
166 """ | 177 """ |
167 Function which executes the creation all the necessary files / folders for a special Datatype, for TrackHub | 178 Function which executes the creation all the necessary files / folders for a special Datatype, for TrackHub |
168 and update the dictionary of datatype | 179 and update the dictionary of datatype |
169 :param ExtensionClass: T <= Datatype | 180 :param ExtensionClass: T <= Datatype |
170 :param array_inputs: list[string] | 181 :param array_inputs: list[string] |
171 :param inputs_data: | 182 :param inputs_data: |
172 :param input_fasta_file: string | |
173 :param extra_files_path: string | |
174 :param tool_directory; string | |
175 """ | 183 """ |
176 | 184 |
177 datatype_dictionary = {} | 185 datatype_dictionary = {} |
178 | 186 |
179 # TODO: Optimize this double loop | 187 # TODO: Optimize this double loop |
180 for input_false_path in array_inputs: | 188 for input_false_path in array_inputs: |
181 for key, data_value in inputs_data.items(): | 189 for key, data_value in inputs_data.items(): |
182 if key == input_false_path: | 190 if key == input_false_path: |
183 extensionObject = ExtensionClass(input_false_path, data_value, | 191 extensionObject = ExtensionClass(input_false_path, data_value) |
184 input_fasta_file, extra_files_path, tool_directory) | 192 |
185 datatype_dictionary.update({data_value["order_index"]: extensionObject}) | 193 datatype_dictionary.update({data_value["order_index"]: extensionObject}) |
186 all_datatype_dictionary.update(datatype_dictionary) | 194 all_datatype_dictionary.update(datatype_dictionary) |
187 | 195 |
188 if __name__ == "__main__": | 196 if __name__ == "__main__": |
189 main(sys.argv) | 197 main(sys.argv) |