comparison TrackHub.py @ 0:f493979f1408 draft default tip

planemo upload for repository https://github.com/Yating-L/hubarchivecreator-test commit 48b59e91e2dcc2e97735ee35d587960cbfbce932-dirty
author yating-l
date Wed, 21 Dec 2016 12:13:04 -0500
parents
children
comparison
equal deleted inserted replaced
-1:000000000000 0:f493979f1408
1 #!/usr/bin/python
2 # -*- coding: utf8 -*-
3
4 import logging
5 import os
6 import tempfile
7 import shutil
8 import zipfile
9
10 # Internal dependencies
11 from Datatype import Datatype
12 from util import subtools
13
14 from mako.lookup import TemplateLookup
15
16
17 class TrackHub(object):
18 """docstring for TrackHub"""
19
20 def __init__(self, inputFastaFile, user_email, outputFile, extra_files_path, tool_directory):
21 super(TrackHub, self).__init__()
22
23 self.rootAssemblyHub = None
24
25 self.mySpecieFolderPath = None
26 self.myTracksFolderPath = None
27 self.tool_directory = tool_directory
28
29 self.reference_genome = inputFastaFile
30 # TODO: Add the specie name
31 self.genome_name = inputFastaFile.assembly_id
32 self.specie_html = self.genome_name + '.html'
33 self.default_pos = None
34 self.user_email = user_email
35
36 # Set containing the groups already added. Updated by addGroup()
37 self.groups = set()
38
39 # TODO: Modify according to the files passed in parameter
40 # ---- Templates ----
41 # Template trackDb
42 mylookup = TemplateLookup(directories=[os.path.join(tool_directory, 'templates/trackDb')],
43 output_encoding='utf-8', encoding_errors='replace')
44 self.trackDbTemplate = mylookup.get_template("layout.txt")
45
46 # Template groups
47 mylookup = TemplateLookup(directories=[os.path.join(self.tool_directory, 'templates/groupsTxt')],
48 output_encoding='utf-8', encoding_errors='replace')
49 self.groupsTemplate = mylookup.get_template("layout.txt")
50
51 # ---- End Templates ----
52
53 self.extra_files_path = extra_files_path
54 self.outputFile = outputFile
55
56 # Create the structure of the Assembly Hub
57 # TODO: Merge the following processing into a function as it is also used in twoBitCreator
58 self.twoBitName = None
59 self.two_bit_final_path = None
60 self.chromSizesFile = None
61
62 self.default_pos = None
63
64 # Set all the missing variables of this class, and create physically the folders/files
65 self.rootAssemblyHub = self.__createAssemblyHub__(extra_files_path=extra_files_path)
66
67 # Init the Datatype
68 Datatype.pre_init(self.reference_genome, self.two_bit_final_path, self.chromSizesFile,
69 self.extra_files_path, self.tool_directory,
70 self.mySpecieFolderPath, self.myTracksFolderPath)
71
72 def createZip(self):
73 for root, dirs, files in os.walk(self.rootAssemblyHub):
74 # Get all files and construct the dir at the same time
75 for file in files:
76 self.outputZip.write(os.path.join(root, file))
77
78 self.outputZip.close()
79
80 def addTrack(self, trackDbObject=None):
81 # Create the trackDb.txt file in the specie folder, if not exists
82 # Else append the new track
83 # TODO: Get this out of the function
84 trackDbTxtFilePath = os.path.join(self.mySpecieFolderPath, 'trackDb.txt')
85
86 # Append to trackDbTxtFilePath the trackDbTemplate populate with the newTrack object
87 with open(trackDbTxtFilePath, 'a+') as trackDbFile:
88 trackDbs = [trackDbObject]
89
90 # TODO: The addGroup does not belong here. Move it when the group becomes more than just a label
91 # Add the group as well, if exists in trackDbObject
92 self.addGroup(trackDbObject.group_name)
93
94 htmlMakoRendered = self.trackDbTemplate.render(
95 trackDbs=trackDbs
96 )
97 trackDbFile.write(htmlMakoRendered)
98
99 logging.debug("We just added track {0} (in group {1})".format(trackDbObject.trackName,
100 trackDbObject.group_name.lower().replace(' ', '_')))
101
102 def addGroup(self, group_name="Default"):
103 # If not already present in self.groups, add to groups.txt
104 # Create the trackDb.txt file in the specie folder, if not exists
105 # Else append the new track
106 # TODO: Get this out of the function
107 groupsTxtFilePath = os.path.join(self.mySpecieFolderPath, 'groups.txt')
108
109 # If the group is already present, we don't need to add it
110 if group_name in self.groups:
111 logging.debug("We DON'T add in {0} the group {1}".format(groupsTxtFilePath,
112 group_name))
113 return
114
115 # Append to trackDbTxtFilePath the trackDbTemplate populate with the newTrack object
116 with open(groupsTxtFilePath, 'a+') as groupFile:
117 # Add the group as well, if exists in trackDbObject
118
119 htmlMakoRendered = self.groupsTemplate.render(
120 label=group_name
121 )
122 groupFile.write(htmlMakoRendered)
123 logging.debug("We just added in {0} the group {1}".format(groupsTxtFilePath,
124 group_name))
125 self.groups.add(group_name)
126
127 def terminate(self):
128 # Just a test to output a simple HTML
129 # TODO: Create a class to handle the file object
130 mylookup = TemplateLookup(directories=[os.path.join(self.tool_directory, 'templates')],
131 output_encoding='utf-8', encoding_errors='replace')
132
133 mytemplate = mylookup.get_template('display.txt')
134 with open(self.outputFile, 'w') as htmlOutput:
135 # TODO: We are basically looping two times: One time with os.walk, Second time
136 # with the template. We could improve that if the number of files begins to be really important
137 list_relative_file_path = [ ]
138
139 # TODO: Create classes Tree to manage this => Better readibility and maintenability
140 def create_tree(array_path, tree, relative_array_file_path, level=0):
141 cur_relative_file_path = '/'.join(relative_array_file_path[:level+1])
142 if array_path[0] in tree.keys():
143 create_tree(array_path[1:], tree[array_path[0]][0],
144 relative_array_file_path, level+1)
145 else:
146 tree[array_path[0]] = ({}, cur_relative_file_path)
147 # TODO: Manage also the links of the directories => No link?
148 # => Managed in display.txt, but could also be managed there
149 # If we are don't have any sub-vertices
150 if len(array_path) == 1:
151 # We create the path to it
152 return
153 else:
154 create_tree(array_path[1:], tree[array_path[0]][0],
155 relative_array_file_path, level + 1)
156
157 walkable_tree = {}
158 for root, dirs, files in os.walk(self.extra_files_path):
159 # Prepare the tree from to perform a Depth First Search
160 for file in files:
161 relative_directory = os.path.relpath(root, self.extra_files_path)
162 relative_file_path = os.path.join(relative_directory, file)
163 array_path = relative_file_path.split('/')
164 create_tree(array_path, walkable_tree, array_path, 0)
165
166 htmlMakoRendered = mytemplate.render(
167 walkable_tree=walkable_tree
168 )
169 htmlOutput.write(htmlMakoRendered)
170
171 def __createAssemblyHub__(self, extra_files_path):
172 # Get all necessaries infos first
173 # 2bit file creation from input fasta
174
175 # baseNameFasta = os.path.basename(fasta_file_name)
176 # suffixTwoBit, extensionTwoBit = os.path.splitext(baseNameFasta)
177 # nameTwoBit = suffixTwoBit + '.2bit'
178 twoBitFile = tempfile.NamedTemporaryFile(bufsize=0)
179 subtools.faToTwoBit(self.reference_genome.false_path, twoBitFile.name)
180
181 # Generate the twoBitInfo
182 twoBitInfoFile = tempfile.NamedTemporaryFile(bufsize=0)
183 subtools.twoBitInfo(twoBitFile.name, twoBitInfoFile.name)
184
185 # Then we get the output to generate the chromSizes
186 self.chromSizesFile = tempfile.NamedTemporaryFile(bufsize=0, suffix=".chrom.sizes")
187 subtools.sortChromSizes(twoBitInfoFile.name, self.chromSizesFile.name)
188
189 # We can get the biggest scaffold here, with chromSizesFile
190 with open(self.chromSizesFile.name, 'r') as chrom_sizes:
191 # TODO: Check if exists
192 self.default_pos = chrom_sizes.readline().split()[0]
193
194 # TODO: Manage to put every fill Function in a file dedicated for reading reasons
195 # Create the root directory
196 myHubPath = os.path.join(extra_files_path, "myHub")
197 if not os.path.exists(myHubPath):
198 os.makedirs(myHubPath)
199
200 # Create the specie folder
201 # TODO: Generate the name depending on the specie
202 mySpecieFolderPath = os.path.join(myHubPath, self.genome_name)
203 if not os.path.exists(mySpecieFolderPath):
204 os.makedirs(mySpecieFolderPath)
205 self.mySpecieFolderPath = mySpecieFolderPath
206
207 # We create the 2bit file while we just created the specie folder
208 self.twoBitName = self.genome_name + ".2bit"
209 self.two_bit_final_path = os.path.join(self.mySpecieFolderPath, self.twoBitName)
210 shutil.copyfile(twoBitFile.name, self.two_bit_final_path)
211
212 # Add the genomes.txt file
213 genomesTxtFilePath = os.path.join(myHubPath, 'genomes.txt')
214 self.__fillGenomesTxt__(genomesTxtFilePath)
215
216 # Add the hub.txt file
217 hubTxtFilePath = os.path.join(myHubPath, 'hub.txt')
218 self.__fillHubTxt__(hubTxtFilePath)
219
220 # Add the hub.html file
221 hubHtmlFilePath = os.path.join(myHubPath, self.specie_html)
222 self.__fillHubHtmlFile__(hubHtmlFilePath)
223
224
225 # Create the description html file in the specie folder
226 descriptionHtmlFilePath = os.path.join(mySpecieFolderPath, 'description.html')
227 self.__fillDescriptionHtmlFile__(descriptionHtmlFilePath)
228
229 # Create the file groups.txt
230 # TODO: If not inputs for this, do no create the file
231 # groupsTxtFilePath = os.path.join(mySpecieFolderPath, 'groups.txt')
232 # self.__fillGroupsTxtFile__(groupsTxtFilePath)
233
234 # Create the folder tracks into the specie folder
235 tracksFolderPath = os.path.join(mySpecieFolderPath, "tracks")
236 if not os.path.exists(tracksFolderPath):
237 os.makedirs(tracksFolderPath)
238 self.myTracksFolderPath = tracksFolderPath
239
240 return myHubPath
241
242 def __fillGenomesTxt__(self, genomesTxtFilePath):
243 # TODO: Think about the inputs and outputs
244 # TODO: Manage the template of this file
245 # renderer = pystache.Renderer(search_dirs="templates/genomesAssembly")
246 pathTemplate = os.path.join(self.tool_directory, 'templates/genomesAssembly')
247 mylookup = TemplateLookup(directories=[pathTemplate], output_encoding='utf-8', encoding_errors='replace')
248 mytemplate = mylookup.get_template("layout.txt")
249 with open(genomesTxtFilePath, 'w') as genomesTxtFile:
250 # Write the content of the file genomes.txt
251 twoBitPath = os.path.join(self.genome_name, self.twoBitName)
252 htmlMakoRendered = mytemplate.render(
253 genomeName=self.genome_name,
254 trackDbPath=os.path.join(self.genome_name, "trackDb.txt"),
255 groupsPath=os.path.join(self.genome_name, "groups.txt"),
256 genomeDescription=self.genome_name,
257 twoBitPath=twoBitPath,
258 organismName=self.genome_name,
259 defaultPosition=self.default_pos,
260 orderKey="4500",
261 scientificName=self.genome_name,
262 pathAssemblyHtmlDescription=os.path.join(self.genome_name, "description.html")
263 )
264 genomesTxtFile.write(htmlMakoRendered)
265
266 def __fillHubTxt__(self, hubTxtFilePath):
267 # TODO: Think about the inputs and outputs
268 # TODO: Manage the template of this file
269 mylookup = TemplateLookup(directories=[os.path.join(self.tool_directory, 'templates/hubTxt')],
270 output_encoding='utf-8', encoding_errors='replace')
271 mytemplate = mylookup.get_template('layout.txt')
272 with open(hubTxtFilePath, 'w') as genomesTxtFile:
273 # Write the content of the file genomes.txt
274 htmlMakoRendered = mytemplate.render(
275 hubName=(''.join(['gonramp', self.genome_name.title()])),
276 shortLabel=self.genome_name,
277 longLabel=self.genome_name,
278 genomesFile='genomes.txt',
279 email=self.user_email,
280 descriptionUrl=self.specie_html
281 )
282 genomesTxtFile.write(htmlMakoRendered)
283
284 def __fillHubHtmlFile__(self, hubHtmlFilePath):
285 # TODO: Think about the inputs and outputs
286 # TODO: Manage the template of this file
287 # renderer = pystache.Renderer(search_dirs="templates/hubDescription")
288 # t = Template(templates.hubDescription.layout.html)
289 mylookup = TemplateLookup(directories=[os.path.join(self.tool_directory, 'templates/hubDescription')],
290 output_encoding='utf-8', encoding_errors='replace')
291 mytemplate = mylookup.get_template("layout.txt")
292 with open(hubHtmlFilePath, 'w') as hubHtmlFile:
293 htmlMakoRendered = mytemplate.render(
294 specie='Dbia',
295 toolUsed='Augustus',
296 ncbiSpecieUrl='http://www.ncbi.nlm.nih.gov/genome/3499',
297 genomeID='3499',
298 specieFullName='Drosophila biarmipes'
299 )
300 #hubHtmlFile.write(htmlMakoRendered)
301
302 def __fillDescriptionHtmlFile__(self, descriptionHtmlFilePath):
303 # TODO: Think about the inputs and outputs
304 # TODO: Manage the template of this file
305 mylookup = TemplateLookup(directories=[os.path.join(self.tool_directory, 'templates/specieDescription')],
306 output_encoding='utf-8', encoding_errors='replace')
307 mytemplate = mylookup.get_template("layout.txt")
308 with open(descriptionHtmlFilePath, 'w') as descriptionHtmlFile:
309 # Write the content of the file genomes.txt
310 htmlMakoRendered = mytemplate.render(
311 specieDescription='This is the description of the dbia',
312 )
313 #descriptionHtmlFile.write(htmlMakoRendered)
314
315 def __fillGroupsTxtFile__(self, groupsTxtFilePath):
316 # TODO: Reenable this function at some point
317 mylookup = TemplateLookup(directories=[os.path.join(self.tool_directory, 'templates/groupsTxt')],
318 output_encoding='utf-8', encoding_errors='replace')
319 mytemplate = mylookup.get_template("layout.txt")
320 with open(groupsTxtFilePath, 'w') as groupsTxtFile:
321 # Write the content of groups.txt
322 # groupsTxtFile.write('name map')
323 htmlMakoRendered = mytemplate.render(
324 mapName='map',
325 labelMapping='Mapping',
326 prioriy='2',
327 isClosed='0'
328 )
329 # groupsTxtFile.write(htmlMakoRendered)