Mercurial > repos > rmarenco > hubarchivecreator
comparison hubArchiveCreator.py @ 1:fb5e60d4d18a draft
planemo upload for repository https://github.com/goeckslab/hub-archive-creator commit 64cfc08088d11f6818c1b4e5514ef9e67969eaff-dirty
author | rmarenco |
---|---|
date | Wed, 13 Jul 2016 13:36:37 -0400 |
parents | |
children | fcff8e9146e7 |
comparison
equal
deleted
inserted
replaced
0:0f3bc17e5ede | 1:fb5e60d4d18a |
---|---|
1 #!/usr/bin/python | |
2 # -*- coding: utf8 -*- | |
3 | |
4 """ | |
5 This Galaxy tool permits to prepare your files to be ready for | |
6 Assembly Hub visualization. | |
7 Program test arguments: | |
8 hubArchiveCreator.py -g test-data/augustusDbia3.gff3 -f test-data/dbia3.fa -d . -u ./tools -o output.html | |
9 """ | |
10 | |
11 import argparse | |
12 import collections | |
13 import json | |
14 import sys | |
15 | |
16 # Internal dependencies | |
17 from TrackHub import TrackHub | |
18 from Gff3 import Gff3 | |
19 from Bam import Bam | |
20 from BedSimpleRepeats import BedSimpleRepeats | |
21 from Bed import Bed | |
22 from BigWig import BigWig | |
23 from Gtf import Gtf | |
24 | |
25 | |
26 # TODO: Verify each subprocessed dependency is accessible [gff3ToGenePred, genePredToBed, twoBitInfo, faToTwoBit, bedToBigBed, sort | |
27 | |
28 | |
29 def main(argv): | |
30 # Command Line parsing init | |
31 parser = argparse.ArgumentParser(description='Create a foo.txt inside the given folder.') | |
32 | |
33 # Reference genome mandatory | |
34 parser.add_argument('-f', '--fasta', help='Fasta file of the reference genome') | |
35 | |
36 # GFF3 Management | |
37 parser.add_argument('--gff3', action='append', help='GFF3 format') | |
38 | |
39 # GTF Management | |
40 parser.add_argument('--gtf', action='append', help='GTF format') | |
41 | |
42 # Bed4+12 (TrfBig) | |
43 parser.add_argument('--bedSimpleRepeats', action='append', help='Bed4+12 format, using simpleRepeats.as') | |
44 | |
45 # Generic Bed (Blastx transformed to bed) | |
46 parser.add_argument('--bed', action='append', help='Bed generic format') | |
47 | |
48 # BigWig Management | |
49 parser.add_argument('--bigwig', action='append', help='BigWig format') | |
50 | |
51 # Bam Management | |
52 parser.add_argument('--bam', action='append', help='Bam format') | |
53 | |
54 # TODO: Check if the running directory can have issues if we run the tool outside | |
55 parser.add_argument('-d', '--directory', | |
56 help='Running tool directory, where to find the templates. Default is running directory') | |
57 parser.add_argument('-u', '--ucsc_tools_path', | |
58 help='Directory where to find the executables needed to run this tool') | |
59 parser.add_argument('-e', '--extra_files_path', | |
60 help='Name, in galaxy, of the output folder. Where you would want to build the Track Hub Archive') | |
61 parser.add_argument('-o', '--output', help='Name of the HTML summarizing the content of the Track Hub Archive') | |
62 | |
63 parser.add_argument('-j', '--data_json', help='Json containing the metadata of the inputs') | |
64 | |
65 ucsc_tools_path = '' | |
66 | |
67 toolDirectory = '.' | |
68 extra_files_path = '.' | |
69 | |
70 # Get the args passed in parameter | |
71 args = parser.parse_args() | |
72 | |
73 input_fasta_file = args.fasta | |
74 | |
75 # TODO: Add array for each input because we can add multiple -b for example + filter the data associated | |
76 | |
77 | |
78 array_inputs_gff3 = args.gff3 | |
79 array_inputs_bed_simple_repeats = args.bedSimpleRepeats | |
80 array_inputs_bed_generic = args.bed | |
81 array_inputs_gtf = args.gtf | |
82 array_inputs_bam = args.bam | |
83 array_inputs_bigwig = args.bigwig | |
84 | |
85 outputFile = args.output | |
86 json_inputs_data = args.data_json | |
87 | |
88 inputs_data = json.loads(json_inputs_data) | |
89 | |
90 # We remove the spaces in ["name"] of inputs_data | |
91 sanitize_name_inputs(inputs_data) | |
92 | |
93 json_inputs_data = args.data_json | |
94 | |
95 inputs_data = json.loads(json_inputs_data) | |
96 # We remove the spaces in ["name"] of inputs_data | |
97 sanitize_name_inputs(inputs_data) | |
98 | |
99 if args.directory: | |
100 toolDirectory = args.directory | |
101 if args.extra_files_path: | |
102 extra_files_path = args.extra_files_path | |
103 if args.ucsc_tools_path: | |
104 ucsc_tools_path = args.ucsc_tools_path | |
105 | |
106 # TODO: Check here all the binaries / tools we need. Exception is missing | |
107 | |
108 # Create the Track Hub folder | |
109 trackHub = TrackHub(input_fasta_file, outputFile, extra_files_path, toolDirectory) | |
110 | |
111 all_datatype_dictionary = {} | |
112 | |
113 # Process Augustus | |
114 if array_inputs_gff3: | |
115 create_ordered_datatype_objects(Gff3, array_inputs_gff3, inputs_data, input_fasta_file, | |
116 extra_files_path, all_datatype_dictionary, toolDirectory) | |
117 | |
118 # Process Bed simple repeats => From Tandem Repeats Finder / TrfBig | |
119 if array_inputs_bed_simple_repeats: | |
120 create_ordered_datatype_objects(BedSimpleRepeats, array_inputs_bed_simple_repeats, inputs_data, input_fasta_file, | |
121 extra_files_path, all_datatype_dictionary, toolDirectory) | |
122 | |
123 # Process a Bed => tBlastN or TopHat | |
124 if array_inputs_bed_generic: | |
125 create_ordered_datatype_objects(Bed, array_inputs_bed_generic, inputs_data, input_fasta_file, | |
126 extra_files_path, all_datatype_dictionary, toolDirectory) | |
127 | |
128 # Process a GTF => Tophat | |
129 if array_inputs_gtf: | |
130 create_ordered_datatype_objects(Gtf, array_inputs_gtf, inputs_data, input_fasta_file, | |
131 extra_files_path, all_datatype_dictionary, toolDirectory) | |
132 | |
133 # Process a Bam => Tophat | |
134 if array_inputs_bam: | |
135 create_ordered_datatype_objects(Bam, array_inputs_bam, inputs_data, input_fasta_file, | |
136 extra_files_path, all_datatype_dictionary, toolDirectory) | |
137 | |
138 # Process a BigWig => From Bam | |
139 if array_inputs_bigwig: | |
140 create_ordered_datatype_objects(BigWig, array_inputs_bigwig, inputs_data, input_fasta_file, | |
141 extra_files_path, all_datatype_dictionary, toolDirectory) | |
142 | |
143 # Create Ordered Dictionary to add the tracks in the tool form order | |
144 all_datatype_ordered_dictionary = collections.OrderedDict(all_datatype_dictionary) | |
145 | |
146 for index, datatypeObject in all_datatype_ordered_dictionary.iteritems(): | |
147 trackHub.addTrack(datatypeObject.track.trackDb) | |
148 | |
149 # We process all the modifications to create the zip file | |
150 trackHub.createZip() | |
151 | |
152 # We terminate le process and so create a HTML file summarizing all the files | |
153 trackHub.terminate() | |
154 | |
155 sys.exit(0) | |
156 | |
157 | |
158 def sanitize_name_inputs(inputs_data): | |
159 """ | |
160 Sometimes output from Galaxy, or even just file name from user have spaces | |
161 :param inputs_data: dict[string, dict[string, string]] | |
162 :return: | |
163 """ | |
164 for key in inputs_data: | |
165 inputs_data[key]["name"] = inputs_data[key]["name"].replace(" ", "_") | |
166 | |
167 | |
168 def create_ordered_datatype_objects(ExtensionClass, array_inputs, inputs_data, input_fasta_file, | |
169 extra_files_path, all_datatype_dictionary, tool_directory): | |
170 """ | |
171 Function which executes the creation all the necessary files / folders for a special Datatype, for TrackHub | |
172 and update the dictionary of datatype | |
173 :param ExtensionClass: T <= Datatype | |
174 :param array_inputs: list[string] | |
175 :param inputs_data: | |
176 :param input_fasta_file: string | |
177 :param extra_files_path: string | |
178 :param tool_directory; string | |
179 """ | |
180 | |
181 datatype_dictionary = {} | |
182 | |
183 # TODO: Optimize this double loop | |
184 for input_false_path in array_inputs: | |
185 for key, data_value in inputs_data.items(): | |
186 if key == input_false_path: | |
187 extensionObject = ExtensionClass(input_false_path, data_value, | |
188 input_fasta_file, extra_files_path, tool_directory) | |
189 datatype_dictionary.update({data_value["order_index"]: extensionObject}) | |
190 all_datatype_dictionary.update(datatype_dictionary) | |
191 | |
192 if __name__ == "__main__": | |
193 main(sys.argv) |