comparison TrackHub.py @ 10:acc233161f50 draft

planemo upload for repository https://github.com/goeckslab/hub-archive-creator commit 1b1063f90004764bcf504f4340738eca5c4b1f9d
author rmarenco
date Thu, 21 Jul 2016 05:58:51 -0400
parents 4f9847539a28
children d05236b15f81
comparison
equal deleted inserted replaced
9:4f9847539a28 10:acc233161f50
1 #!/usr/bin/python 1 #!/usr/bin/python
2 # -*- coding: utf8 -*- 2 # -*- coding: utf8 -*-
3 3
4 import os 4 import os
5 import tempfile
6 import shutil
5 import zipfile 7 import zipfile
8
9 # Internal dependencies
10 from Datatype import Datatype
11 from util import subtools
6 12
7 from mako.lookup import TemplateLookup 13 from mako.lookup import TemplateLookup
8 14
9 15
10 class TrackHub(object): 16 class TrackHub(object):
11 """docstring for TrackHub""" 17 """docstring for TrackHub"""
12 18
13 def __init__(self, inputFastaFile, outputFile, extra_files_path, tool_directory): 19 def __init__(self, inputFastaFile, user_email, outputFile, extra_files_path, tool_directory):
14 super(TrackHub, self).__init__() 20 super(TrackHub, self).__init__()
15 21
16 self.rootAssemblyHub = None 22 self.rootAssemblyHub = None
23
17 self.mySpecieFolderPath = None 24 self.mySpecieFolderPath = None
25 self.myTracksFolderPath = None
18 self.tool_directory = tool_directory 26 self.tool_directory = tool_directory
27
28 self.reference_genome = inputFastaFile
29 # TODO: Add the specie name
30 self.genome_name = inputFastaFile.assembly_id
31 self.default_pos = None
32 self.user_email = user_email
19 33
20 # TODO: Modify according to the files passed in parameter 34 # TODO: Modify according to the files passed in parameter
21 mylookup = TemplateLookup(directories=[os.path.join(tool_directory, 'templates/trackDb')], 35 mylookup = TemplateLookup(directories=[os.path.join(tool_directory, 'templates/trackDb')],
22 output_encoding='utf-8', encoding_errors='replace') 36 output_encoding='utf-8', encoding_errors='replace')
23 self.trackDbTemplate = mylookup.get_template("layout.txt") 37 self.trackDbTemplate = mylookup.get_template("layout.txt")
24 38
25 self.extra_files_path = extra_files_path 39 self.extra_files_path = extra_files_path
26 self.outputFile = outputFile 40 self.outputFile = outputFile
27 41
28 inputFastaFile = open(inputFastaFile, 'r')
29 #self.outputZip = zipfile.ZipFile(os.path.join(extra_files_path, 'myHub.zip'), 'w', allowZip64=True)
30
31 # Create the structure of the Assembly Hub 42 # Create the structure of the Assembly Hub
32 # TODO: Merge the following processing into a function as it is also used in twoBitCreator 43 # TODO: Merge the following processing into a function as it is also used in twoBitCreator
33 baseNameFasta = os.path.basename(inputFastaFile.name) 44 self.twoBitName = None
34 suffixTwoBit, extensionTwoBit = os.path.splitext(baseNameFasta) 45 self.two_bit_final_path = None
35 self.twoBitName = suffixTwoBit + '.2bit' 46 self.chromSizesFile = None
36 47
37 self.rootAssemblyHub = self.__createAssemblyHub__(toolDirectory=tool_directory, 48 self.default_pos = None
38 extra_files_path=extra_files_path) 49
50 # Set all the missing variables of this class, and create physically the folders/files
51 self.rootAssemblyHub = self.__createAssemblyHub__(extra_files_path=extra_files_path)
52
53 # Init the Datatype
54 Datatype.pre_init(self.reference_genome, self.two_bit_final_path, self.chromSizesFile,
55 self.extra_files_path, self.tool_directory,
56 self.mySpecieFolderPath, self.myTracksFolderPath)
39 57
40 def createZip(self): 58 def createZip(self):
41 for root, dirs, files in os.walk(self.rootAssemblyHub): 59 for root, dirs, files in os.walk(self.rootAssemblyHub):
42 # Get all files and construct the dir at the same time 60 # Get all files and construct the dir at the same time
43 for file in files: 61 for file in files:
58 ) 76 )
59 trackDbFile.write(htmlMakoRendered) 77 trackDbFile.write(htmlMakoRendered)
60 78
61 def terminate(self): 79 def terminate(self):
62 # Just a test to output a simple HTML 80 # Just a test to output a simple HTML
81 # TODO: Create a class to handle the file object
82 mylookup = TemplateLookup(directories=[os.path.join(self.tool_directory, 'templates')],
83 output_encoding='utf-8', encoding_errors='replace')
84
85 mytemplate = mylookup.get_template('display.txt')
63 with open(self.outputFile, 'w') as htmlOutput: 86 with open(self.outputFile, 'w') as htmlOutput:
64 htmlOutput.write('<html>') 87 # TODO: We are basically looping two times: One time with os.walk, Second time
65 htmlOutput.write('<body>') 88 # with the template. We could improve that if the number of files begins to be really important
66 htmlOutput.write('<p>') 89 list_relative_file_path = [ ]
67 htmlOutput.write('The following has been generated by Hub Archive Creator:')
68 htmlOutput.write('</p>')
69 htmlOutput.write('<ul>')
70 for root, dirs, files in os.walk(self.extra_files_path): 90 for root, dirs, files in os.walk(self.extra_files_path):
71 for file in files: 91 for file in files:
72 relDir = os.path.relpath(root, self.extra_files_path) 92 relative_directory = os.path.relpath(root, self.extra_files_path)
73 htmlOutput.write(str.format('<li><a href="{0}">{1}</a></li>', os.path.join(relDir, file), 93 relative_file_path = os.path.join(relative_directory, file)
74 os.path.join(relDir, file))) 94 list_relative_file_path.append(relative_file_path)
75 htmlOutput.write('<ul>') 95
76 htmlOutput.write('</body>') 96 htmlMakoRendered = mytemplate.render(
77 htmlOutput.write('</html>') 97 list_relative_file_path=list_relative_file_path
78 98 )
79 def __createAssemblyHub__(self, toolDirectory, extra_files_path): 99 htmlOutput.write(htmlMakoRendered)
100
101 def __createAssemblyHub__(self, extra_files_path):
102 # Get all necessaries infos first
103 # 2bit file creation from input fasta
104
105 # baseNameFasta = os.path.basename(fasta_file_name)
106 # suffixTwoBit, extensionTwoBit = os.path.splitext(baseNameFasta)
107 # nameTwoBit = suffixTwoBit + '.2bit'
108 twoBitFile = tempfile.NamedTemporaryFile(bufsize=0)
109 subtools.faToTwoBit(self.reference_genome.false_path, twoBitFile.name)
110
111 # Generate the twoBitInfo
112 twoBitInfoFile = tempfile.NamedTemporaryFile(bufsize=0)
113 subtools.twoBitInfo(twoBitFile.name, twoBitInfoFile.name)
114
115 # Then we get the output to generate the chromSizes
116 self.chromSizesFile = tempfile.NamedTemporaryFile(bufsize=0, suffix=".chrom.sizes")
117 subtools.sortChromSizes(twoBitInfoFile.name, self.chromSizesFile.name)
118
119 # We can get the biggest scaffold here, with chromSizesFile
120 with open(self.chromSizesFile.name, 'r') as chrom_sizes:
121 # TODO: Check if exists
122 self.default_pos = chrom_sizes.readline().split()[0]
123
80 # TODO: Manage to put every fill Function in a file dedicated for reading reasons 124 # TODO: Manage to put every fill Function in a file dedicated for reading reasons
81 # Create the root directory 125 # Create the root directory
82 myHubPath = os.path.join(extra_files_path, "myHub") 126 myHubPath = os.path.join(extra_files_path, "myHub")
83 if not os.path.exists(myHubPath): 127 if not os.path.exists(myHubPath):
84 os.makedirs(myHubPath) 128 os.makedirs(myHubPath)
85 129
130 # Create the specie folder
131 # TODO: Generate the name depending on the specie
132 mySpecieFolderPath = os.path.join(myHubPath, self.genome_name)
133 if not os.path.exists(mySpecieFolderPath):
134 os.makedirs(mySpecieFolderPath)
135 self.mySpecieFolderPath = mySpecieFolderPath
136
137 # We create the 2bit file while we just created the specie folder
138 self.twoBitName = self.genome_name + ".2bit"
139 self.two_bit_final_path = os.path.join(self.mySpecieFolderPath, self.twoBitName)
140 shutil.copyfile(twoBitFile.name, self.two_bit_final_path)
141
86 # Add the genomes.txt file 142 # Add the genomes.txt file
87 genomesTxtFilePath = os.path.join(myHubPath, 'genomes.txt') 143 genomesTxtFilePath = os.path.join(myHubPath, 'genomes.txt')
88 self.__fillGenomesTxt__(genomesTxtFilePath, toolDirectory) 144 self.__fillGenomesTxt__(genomesTxtFilePath)
89 145
90 # Add the hub.txt file 146 # Add the hub.txt file
91 hubTxtFilePath = os.path.join(myHubPath, 'hub.txt') 147 hubTxtFilePath = os.path.join(myHubPath, 'hub.txt')
92 self.__fillHubTxt__(hubTxtFilePath, toolDirectory) 148 self.__fillHubTxt__(hubTxtFilePath)
93 149
94 # Add the hub.html file 150 # Add the hub.html file
95 # TODO: Change the name and get it depending on the specie 151 # TODO: Change the name and get it depending on the specie
96 hubHtmlFilePath = os.path.join(myHubPath, 'dbia.html') 152 hubHtmlFilePath = os.path.join(myHubPath, 'dbia.html')
97 self.__fillHubHtmlFile__(hubHtmlFilePath, toolDirectory) 153 self.__fillHubHtmlFile__(hubHtmlFilePath)
98 154
99 # Create the specie folder
100 # TODO: Generate the name depending on the specie
101 mySpecieFolderPath = os.path.join(myHubPath, "dbia3")
102 if not os.path.exists(mySpecieFolderPath):
103 os.makedirs(mySpecieFolderPath)
104 self.mySpecieFolderPath = mySpecieFolderPath
105 155
106 # Create the description html file in the specie folder 156 # Create the description html file in the specie folder
107 descriptionHtmlFilePath = os.path.join(mySpecieFolderPath, 'description.html') 157 descriptionHtmlFilePath = os.path.join(mySpecieFolderPath, 'description.html')
108 self.__fillDescriptionHtmlFile__(descriptionHtmlFilePath, toolDirectory) 158 self.__fillDescriptionHtmlFile__(descriptionHtmlFilePath)
109 159
110 # Create the file groups.txt 160 # Create the file groups.txt
111 # TODO: If not inputs for this, do no create the file 161 # TODO: If not inputs for this, do no create the file
112 groupsTxtFilePath = os.path.join(mySpecieFolderPath, 'groups.txt') 162 groupsTxtFilePath = os.path.join(mySpecieFolderPath, 'groups.txt')
113 self.__fillGroupsTxtFile__(groupsTxtFilePath, toolDirectory) 163 self.__fillGroupsTxtFile__(groupsTxtFilePath)
114 164
115 # Create the folder tracks into the specie folder 165 # Create the folder tracks into the specie folder
116 tracksFolderPath = os.path.join(mySpecieFolderPath, "tracks") 166 tracksFolderPath = os.path.join(mySpecieFolderPath, "tracks")
117 if not os.path.exists(tracksFolderPath): 167 if not os.path.exists(tracksFolderPath):
118 os.makedirs(tracksFolderPath) 168 os.makedirs(tracksFolderPath)
169 self.myTracksFolderPath = tracksFolderPath
119 170
120 return myHubPath 171 return myHubPath
121 172
122 def __fillGenomesTxt__(self, genomesTxtFilePath, toolDirectory): 173 def __fillGenomesTxt__(self, genomesTxtFilePath):
123 # TODO: Think about the inputs and outputs 174 # TODO: Think about the inputs and outputs
124 # TODO: Manage the template of this file 175 # TODO: Manage the template of this file
125 # renderer = pystache.Renderer(search_dirs="templates/genomesAssembly") 176 # renderer = pystache.Renderer(search_dirs="templates/genomesAssembly")
126 pathTemplate = os.path.join(toolDirectory, 'templates/genomesAssembly') 177 pathTemplate = os.path.join(self.tool_directory, 'templates/genomesAssembly')
127 mylookup = TemplateLookup(directories=[pathTemplate], output_encoding='utf-8', encoding_errors='replace') 178 mylookup = TemplateLookup(directories=[pathTemplate], output_encoding='utf-8', encoding_errors='replace')
128 mytemplate = mylookup.get_template("layout.txt") 179 mytemplate = mylookup.get_template("layout.txt")
129 with open(genomesTxtFilePath, 'w') as genomesTxtFile: 180 with open(genomesTxtFilePath, 'w') as genomesTxtFile:
130 # Write the content of the file genomes.txt 181 # Write the content of the file genomes.txt
131 twoBitPath = os.path.join('dbia3/', self.twoBitName) 182 twoBitPath = os.path.join(self.genome_name, self.twoBitName)
132 htmlMakoRendered = mytemplate.render( 183 htmlMakoRendered = mytemplate.render(
133 genomeName="dbia3", 184 genomeName=self.genome_name,
134 trackDbPath="dbia3/trackDb.txt", 185 trackDbPath=os.path.join(self.genome_name, "trackDb.txt"),
135 groupsPath="dbia3/groups.txt", 186 groupsPath=os.path.join(self.genome_name, "groups.txt"),
136 genomeDescription="March 2013 Drosophilia biarmipes unplaced genomic scaffold", 187 genomeDescription=self.genome_name,
137 twoBitPath=twoBitPath, 188 twoBitPath=twoBitPath,
138 organismName="Drosophilia biarmipes", 189 organismName=self.genome_name,
139 defaultPosition="contig1", 190 defaultPosition=self.default_pos,
140 orderKey="4500", 191 orderKey="4500",
141 scientificName="Drosophilia biarmipes", 192 scientificName=self.genome_name,
142 pathAssemblyHtmlDescription="dbia3/description.html" 193 pathAssemblyHtmlDescription=os.path.join(self.genome_name, "description.html")
143 ) 194 )
144 genomesTxtFile.write(htmlMakoRendered) 195 genomesTxtFile.write(htmlMakoRendered)
145 196
146 def __fillHubTxt__(self, hubTxtFilePath, toolDirectory): 197 def __fillHubTxt__(self, hubTxtFilePath):
147 # TODO: Think about the inputs and outputs 198 # TODO: Think about the inputs and outputs
148 # TODO: Manage the template of this file 199 # TODO: Manage the template of this file
149 mylookup = TemplateLookup(directories=[os.path.join(toolDirectory, 'templates/hubTxt')], 200 mylookup = TemplateLookup(directories=[os.path.join(self.tool_directory, 'templates/hubTxt')],
150 output_encoding='utf-8', encoding_errors='replace') 201 output_encoding='utf-8', encoding_errors='replace')
151 mytemplate = mylookup.get_template('layout.txt') 202 mytemplate = mylookup.get_template('layout.txt')
152 with open(hubTxtFilePath, 'w') as genomesTxtFile: 203 with open(hubTxtFilePath, 'w') as genomesTxtFile:
153 # Write the content of the file genomes.txt 204 # Write the content of the file genomes.txt
154 htmlMakoRendered = mytemplate.render( 205 htmlMakoRendered = mytemplate.render(
155 hubName='dbiaOnly', 206 hubName=(''.join(['gonramp', self.genome_name.title()])),
156 shortLabel='dbia', 207 shortLabel=self.genome_name,
157 longLabel='This hub only contains dbia with the gene predictions', 208 longLabel=self.genome_name,
158 genomesFile='genomes.txt', 209 genomesFile='genomes.txt',
159 email='rmarenco@gwu.edu', 210 email=self.user_email,
160 descriptionUrl='dbia.html' 211 descriptionUrl='dbia.html'
161 ) 212 )
162 genomesTxtFile.write(htmlMakoRendered) 213 genomesTxtFile.write(htmlMakoRendered)
163 214
164 def __fillHubHtmlFile__(self, hubHtmlFilePath, toolDirectory): 215 def __fillHubHtmlFile__(self, hubHtmlFilePath):
165 # TODO: Think about the inputs and outputs 216 # TODO: Think about the inputs and outputs
166 # TODO: Manage the template of this file 217 # TODO: Manage the template of this file
167 # renderer = pystache.Renderer(search_dirs="templates/hubDescription") 218 # renderer = pystache.Renderer(search_dirs="templates/hubDescription")
168 # t = Template(templates.hubDescription.layout.html) 219 # t = Template(templates.hubDescription.layout.html)
169 mylookup = TemplateLookup(directories=[os.path.join(toolDirectory, 'templates/hubDescription')], 220 mylookup = TemplateLookup(directories=[os.path.join(self.tool_directory, 'templates/hubDescription')],
170 output_encoding='utf-8', encoding_errors='replace') 221 output_encoding='utf-8', encoding_errors='replace')
171 mytemplate = mylookup.get_template("layout.txt") 222 mytemplate = mylookup.get_template("layout.txt")
172 with open(hubHtmlFilePath, 'w') as hubHtmlFile: 223 with open(hubHtmlFilePath, 'w') as hubHtmlFile:
173 # Write the content of the file genomes.txt
174 # htmlPystached = renderer.render_name(
175 # "layout",
176 # {'specie': 'Dbia',
177 # 'toolUsed': 'Augustus',
178 # 'ncbiSpecieUrl': 'http://www.ncbi.nlm.nih.gov/genome/3499',
179 # 'genomeID': '3499',
180 # 'SpecieFullName': 'Drosophila biarmipes'})
181 htmlMakoRendered = mytemplate.render( 224 htmlMakoRendered = mytemplate.render(
182 specie='Dbia', 225 specie='Dbia',
183 toolUsed='Augustus', 226 toolUsed='Augustus',
184 ncbiSpecieUrl='http://www.ncbi.nlm.nih.gov/genome/3499', 227 ncbiSpecieUrl='http://www.ncbi.nlm.nih.gov/genome/3499',
185 genomeID='3499', 228 genomeID='3499',
186 specieFullName='Drosophila biarmipes' 229 specieFullName='Drosophila biarmipes'
187 ) 230 )
188 # hubHtmlFile.write(htmlPystached) 231 #hubHtmlFile.write(htmlMakoRendered)
189 hubHtmlFile.write(htmlMakoRendered) 232
190 233 def __fillDescriptionHtmlFile__(self, descriptionHtmlFilePath):
191 def __fillDescriptionHtmlFile__(self, descriptionHtmlFilePath, toolDirectory): 234 # TODO: Think about the inputs and outputs
192 # TODO: Think about the inputs and outputs 235 # TODO: Manage the template of this file
193 # TODO: Manage the template of this file 236 mylookup = TemplateLookup(directories=[os.path.join(self.tool_directory, 'templates/specieDescription')],
194 mylookup = TemplateLookup(directories=[os.path.join(toolDirectory, 'templates/specieDescription')],
195 output_encoding='utf-8', encoding_errors='replace') 237 output_encoding='utf-8', encoding_errors='replace')
196 mytemplate = mylookup.get_template("layout.txt") 238 mytemplate = mylookup.get_template("layout.txt")
197 with open(descriptionHtmlFilePath, 'w') as descriptionHtmlFile: 239 with open(descriptionHtmlFilePath, 'w') as descriptionHtmlFile:
198 # Write the content of the file genomes.txt 240 # Write the content of the file genomes.txt
199 htmlMakoRendered = mytemplate.render( 241 htmlMakoRendered = mytemplate.render(
200 specieDescription='This is the description of the dbia', 242 specieDescription='This is the description of the dbia',
201 ) 243 )
202 descriptionHtmlFile.write(htmlMakoRendered) 244 #descriptionHtmlFile.write(htmlMakoRendered)
203 245
204 def __fillGroupsTxtFile__(self, groupsTxtFilePath, toolDirectory): 246 def __fillGroupsTxtFile__(self, groupsTxtFilePath):
205 # TODO: Reenable this function at some point 247 # TODO: Reenable this function at some point
206 mylookup = TemplateLookup(directories=[os.path.join(toolDirectory, 'templates/groupsTxt')], 248 mylookup = TemplateLookup(directories=[os.path.join(self.tool_directory, 'templates/groupsTxt')],
207 output_encoding='utf-8', encoding_errors='replace') 249 output_encoding='utf-8', encoding_errors='replace')
208 mytemplate = mylookup.get_template("layout.txt") 250 mytemplate = mylookup.get_template("layout.txt")
209 with open(groupsTxtFilePath, 'w') as groupsTxtFile: 251 with open(groupsTxtFilePath, 'w') as groupsTxtFile:
210 # Write the content of groups.txt 252 # Write the content of groups.txt
211 # groupsTxtFile.write('name map') 253 # groupsTxtFile.write('name map')