comparison hubArchiveCreator.py @ 1:fb5e60d4d18a draft

planemo upload for repository https://github.com/goeckslab/hub-archive-creator commit 64cfc08088d11f6818c1b4e5514ef9e67969eaff-dirty
author rmarenco
date Wed, 13 Jul 2016 13:36:37 -0400
parents
children fcff8e9146e7
comparison
equal deleted inserted replaced
0:0f3bc17e5ede 1:fb5e60d4d18a
1 #!/usr/bin/python
2 # -*- coding: utf8 -*-
3
4 """
5 This Galaxy tool permits to prepare your files to be ready for
6 Assembly Hub visualization.
7 Program test arguments:
8 hubArchiveCreator.py -g test-data/augustusDbia3.gff3 -f test-data/dbia3.fa -d . -u ./tools -o output.html
9 """
10
11 import argparse
12 import collections
13 import json
14 import sys
15
16 # Internal dependencies
17 from TrackHub import TrackHub
18 from Gff3 import Gff3
19 from Bam import Bam
20 from BedSimpleRepeats import BedSimpleRepeats
21 from Bed import Bed
22 from BigWig import BigWig
23 from Gtf import Gtf
24
25
26 # TODO: Verify each subprocessed dependency is accessible [gff3ToGenePred, genePredToBed, twoBitInfo, faToTwoBit, bedToBigBed, sort
27
28
29 def main(argv):
30 # Command Line parsing init
31 parser = argparse.ArgumentParser(description='Create a foo.txt inside the given folder.')
32
33 # Reference genome mandatory
34 parser.add_argument('-f', '--fasta', help='Fasta file of the reference genome')
35
36 # GFF3 Management
37 parser.add_argument('--gff3', action='append', help='GFF3 format')
38
39 # GTF Management
40 parser.add_argument('--gtf', action='append', help='GTF format')
41
42 # Bed4+12 (TrfBig)
43 parser.add_argument('--bedSimpleRepeats', action='append', help='Bed4+12 format, using simpleRepeats.as')
44
45 # Generic Bed (Blastx transformed to bed)
46 parser.add_argument('--bed', action='append', help='Bed generic format')
47
48 # BigWig Management
49 parser.add_argument('--bigwig', action='append', help='BigWig format')
50
51 # Bam Management
52 parser.add_argument('--bam', action='append', help='Bam format')
53
54 # TODO: Check if the running directory can have issues if we run the tool outside
55 parser.add_argument('-d', '--directory',
56 help='Running tool directory, where to find the templates. Default is running directory')
57 parser.add_argument('-u', '--ucsc_tools_path',
58 help='Directory where to find the executables needed to run this tool')
59 parser.add_argument('-e', '--extra_files_path',
60 help='Name, in galaxy, of the output folder. Where you would want to build the Track Hub Archive')
61 parser.add_argument('-o', '--output', help='Name of the HTML summarizing the content of the Track Hub Archive')
62
63 parser.add_argument('-j', '--data_json', help='Json containing the metadata of the inputs')
64
65 ucsc_tools_path = ''
66
67 toolDirectory = '.'
68 extra_files_path = '.'
69
70 # Get the args passed in parameter
71 args = parser.parse_args()
72
73 input_fasta_file = args.fasta
74
75 # TODO: Add array for each input because we can add multiple -b for example + filter the data associated
76
77
78 array_inputs_gff3 = args.gff3
79 array_inputs_bed_simple_repeats = args.bedSimpleRepeats
80 array_inputs_bed_generic = args.bed
81 array_inputs_gtf = args.gtf
82 array_inputs_bam = args.bam
83 array_inputs_bigwig = args.bigwig
84
85 outputFile = args.output
86 json_inputs_data = args.data_json
87
88 inputs_data = json.loads(json_inputs_data)
89
90 # We remove the spaces in ["name"] of inputs_data
91 sanitize_name_inputs(inputs_data)
92
93 json_inputs_data = args.data_json
94
95 inputs_data = json.loads(json_inputs_data)
96 # We remove the spaces in ["name"] of inputs_data
97 sanitize_name_inputs(inputs_data)
98
99 if args.directory:
100 toolDirectory = args.directory
101 if args.extra_files_path:
102 extra_files_path = args.extra_files_path
103 if args.ucsc_tools_path:
104 ucsc_tools_path = args.ucsc_tools_path
105
106 # TODO: Check here all the binaries / tools we need. Exception is missing
107
108 # Create the Track Hub folder
109 trackHub = TrackHub(input_fasta_file, outputFile, extra_files_path, toolDirectory)
110
111 all_datatype_dictionary = {}
112
113 # Process Augustus
114 if array_inputs_gff3:
115 create_ordered_datatype_objects(Gff3, array_inputs_gff3, inputs_data, input_fasta_file,
116 extra_files_path, all_datatype_dictionary, toolDirectory)
117
118 # Process Bed simple repeats => From Tandem Repeats Finder / TrfBig
119 if array_inputs_bed_simple_repeats:
120 create_ordered_datatype_objects(BedSimpleRepeats, array_inputs_bed_simple_repeats, inputs_data, input_fasta_file,
121 extra_files_path, all_datatype_dictionary, toolDirectory)
122
123 # Process a Bed => tBlastN or TopHat
124 if array_inputs_bed_generic:
125 create_ordered_datatype_objects(Bed, array_inputs_bed_generic, inputs_data, input_fasta_file,
126 extra_files_path, all_datatype_dictionary, toolDirectory)
127
128 # Process a GTF => Tophat
129 if array_inputs_gtf:
130 create_ordered_datatype_objects(Gtf, array_inputs_gtf, inputs_data, input_fasta_file,
131 extra_files_path, all_datatype_dictionary, toolDirectory)
132
133 # Process a Bam => Tophat
134 if array_inputs_bam:
135 create_ordered_datatype_objects(Bam, array_inputs_bam, inputs_data, input_fasta_file,
136 extra_files_path, all_datatype_dictionary, toolDirectory)
137
138 # Process a BigWig => From Bam
139 if array_inputs_bigwig:
140 create_ordered_datatype_objects(BigWig, array_inputs_bigwig, inputs_data, input_fasta_file,
141 extra_files_path, all_datatype_dictionary, toolDirectory)
142
143 # Create Ordered Dictionary to add the tracks in the tool form order
144 all_datatype_ordered_dictionary = collections.OrderedDict(all_datatype_dictionary)
145
146 for index, datatypeObject in all_datatype_ordered_dictionary.iteritems():
147 trackHub.addTrack(datatypeObject.track.trackDb)
148
149 # We process all the modifications to create the zip file
150 trackHub.createZip()
151
152 # We terminate le process and so create a HTML file summarizing all the files
153 trackHub.terminate()
154
155 sys.exit(0)
156
157
158 def sanitize_name_inputs(inputs_data):
159 """
160 Sometimes output from Galaxy, or even just file name from user have spaces
161 :param inputs_data: dict[string, dict[string, string]]
162 :return:
163 """
164 for key in inputs_data:
165 inputs_data[key]["name"] = inputs_data[key]["name"].replace(" ", "_")
166
167
168 def create_ordered_datatype_objects(ExtensionClass, array_inputs, inputs_data, input_fasta_file,
169 extra_files_path, all_datatype_dictionary, tool_directory):
170 """
171 Function which executes the creation all the necessary files / folders for a special Datatype, for TrackHub
172 and update the dictionary of datatype
173 :param ExtensionClass: T <= Datatype
174 :param array_inputs: list[string]
175 :param inputs_data:
176 :param input_fasta_file: string
177 :param extra_files_path: string
178 :param tool_directory; string
179 """
180
181 datatype_dictionary = {}
182
183 # TODO: Optimize this double loop
184 for input_false_path in array_inputs:
185 for key, data_value in inputs_data.items():
186 if key == input_false_path:
187 extensionObject = ExtensionClass(input_false_path, data_value,
188 input_fasta_file, extra_files_path, tool_directory)
189 datatype_dictionary.update({data_value["order_index"]: extensionObject})
190 all_datatype_dictionary.update(datatype_dictionary)
191
192 if __name__ == "__main__":
193 main(sys.argv)