Mercurial > repos > rmarenco > hubarchivecreator
comparison TrackHub.py @ 10:acc233161f50 draft
planemo upload for repository https://github.com/goeckslab/hub-archive-creator commit 1b1063f90004764bcf504f4340738eca5c4b1f9d
author | rmarenco |
---|---|
date | Thu, 21 Jul 2016 05:58:51 -0400 |
parents | 4f9847539a28 |
children | d05236b15f81 |
comparison
equal
deleted
inserted
replaced
9:4f9847539a28 | 10:acc233161f50 |
---|---|
1 #!/usr/bin/python | 1 #!/usr/bin/python |
2 # -*- coding: utf8 -*- | 2 # -*- coding: utf8 -*- |
3 | 3 |
4 import os | 4 import os |
5 import tempfile | |
6 import shutil | |
5 import zipfile | 7 import zipfile |
8 | |
9 # Internal dependencies | |
10 from Datatype import Datatype | |
11 from util import subtools | |
6 | 12 |
7 from mako.lookup import TemplateLookup | 13 from mako.lookup import TemplateLookup |
8 | 14 |
9 | 15 |
10 class TrackHub(object): | 16 class TrackHub(object): |
11 """docstring for TrackHub""" | 17 """docstring for TrackHub""" |
12 | 18 |
13 def __init__(self, inputFastaFile, outputFile, extra_files_path, tool_directory): | 19 def __init__(self, inputFastaFile, user_email, outputFile, extra_files_path, tool_directory): |
14 super(TrackHub, self).__init__() | 20 super(TrackHub, self).__init__() |
15 | 21 |
16 self.rootAssemblyHub = None | 22 self.rootAssemblyHub = None |
23 | |
17 self.mySpecieFolderPath = None | 24 self.mySpecieFolderPath = None |
25 self.myTracksFolderPath = None | |
18 self.tool_directory = tool_directory | 26 self.tool_directory = tool_directory |
27 | |
28 self.reference_genome = inputFastaFile | |
29 # TODO: Add the specie name | |
30 self.genome_name = inputFastaFile.assembly_id | |
31 self.default_pos = None | |
32 self.user_email = user_email | |
19 | 33 |
20 # TODO: Modify according to the files passed in parameter | 34 # TODO: Modify according to the files passed in parameter |
21 mylookup = TemplateLookup(directories=[os.path.join(tool_directory, 'templates/trackDb')], | 35 mylookup = TemplateLookup(directories=[os.path.join(tool_directory, 'templates/trackDb')], |
22 output_encoding='utf-8', encoding_errors='replace') | 36 output_encoding='utf-8', encoding_errors='replace') |
23 self.trackDbTemplate = mylookup.get_template("layout.txt") | 37 self.trackDbTemplate = mylookup.get_template("layout.txt") |
24 | 38 |
25 self.extra_files_path = extra_files_path | 39 self.extra_files_path = extra_files_path |
26 self.outputFile = outputFile | 40 self.outputFile = outputFile |
27 | 41 |
28 inputFastaFile = open(inputFastaFile, 'r') | |
29 #self.outputZip = zipfile.ZipFile(os.path.join(extra_files_path, 'myHub.zip'), 'w', allowZip64=True) | |
30 | |
31 # Create the structure of the Assembly Hub | 42 # Create the structure of the Assembly Hub |
32 # TODO: Merge the following processing into a function as it is also used in twoBitCreator | 43 # TODO: Merge the following processing into a function as it is also used in twoBitCreator |
33 baseNameFasta = os.path.basename(inputFastaFile.name) | 44 self.twoBitName = None |
34 suffixTwoBit, extensionTwoBit = os.path.splitext(baseNameFasta) | 45 self.two_bit_final_path = None |
35 self.twoBitName = suffixTwoBit + '.2bit' | 46 self.chromSizesFile = None |
36 | 47 |
37 self.rootAssemblyHub = self.__createAssemblyHub__(toolDirectory=tool_directory, | 48 self.default_pos = None |
38 extra_files_path=extra_files_path) | 49 |
50 # Set all the missing variables of this class, and create physically the folders/files | |
51 self.rootAssemblyHub = self.__createAssemblyHub__(extra_files_path=extra_files_path) | |
52 | |
53 # Init the Datatype | |
54 Datatype.pre_init(self.reference_genome, self.two_bit_final_path, self.chromSizesFile, | |
55 self.extra_files_path, self.tool_directory, | |
56 self.mySpecieFolderPath, self.myTracksFolderPath) | |
39 | 57 |
40 def createZip(self): | 58 def createZip(self): |
41 for root, dirs, files in os.walk(self.rootAssemblyHub): | 59 for root, dirs, files in os.walk(self.rootAssemblyHub): |
42 # Get all files and construct the dir at the same time | 60 # Get all files and construct the dir at the same time |
43 for file in files: | 61 for file in files: |
58 ) | 76 ) |
59 trackDbFile.write(htmlMakoRendered) | 77 trackDbFile.write(htmlMakoRendered) |
60 | 78 |
61 def terminate(self): | 79 def terminate(self): |
62 # Just a test to output a simple HTML | 80 # Just a test to output a simple HTML |
81 # TODO: Create a class to handle the file object | |
82 mylookup = TemplateLookup(directories=[os.path.join(self.tool_directory, 'templates')], | |
83 output_encoding='utf-8', encoding_errors='replace') | |
84 | |
85 mytemplate = mylookup.get_template('display.txt') | |
63 with open(self.outputFile, 'w') as htmlOutput: | 86 with open(self.outputFile, 'w') as htmlOutput: |
64 htmlOutput.write('<html>') | 87 # TODO: We are basically looping two times: One time with os.walk, Second time |
65 htmlOutput.write('<body>') | 88 # with the template. We could improve that if the number of files begins to be really important |
66 htmlOutput.write('<p>') | 89 list_relative_file_path = [ ] |
67 htmlOutput.write('The following has been generated by Hub Archive Creator:') | |
68 htmlOutput.write('</p>') | |
69 htmlOutput.write('<ul>') | |
70 for root, dirs, files in os.walk(self.extra_files_path): | 90 for root, dirs, files in os.walk(self.extra_files_path): |
71 for file in files: | 91 for file in files: |
72 relDir = os.path.relpath(root, self.extra_files_path) | 92 relative_directory = os.path.relpath(root, self.extra_files_path) |
73 htmlOutput.write(str.format('<li><a href="{0}">{1}</a></li>', os.path.join(relDir, file), | 93 relative_file_path = os.path.join(relative_directory, file) |
74 os.path.join(relDir, file))) | 94 list_relative_file_path.append(relative_file_path) |
75 htmlOutput.write('<ul>') | 95 |
76 htmlOutput.write('</body>') | 96 htmlMakoRendered = mytemplate.render( |
77 htmlOutput.write('</html>') | 97 list_relative_file_path=list_relative_file_path |
78 | 98 ) |
79 def __createAssemblyHub__(self, toolDirectory, extra_files_path): | 99 htmlOutput.write(htmlMakoRendered) |
100 | |
101 def __createAssemblyHub__(self, extra_files_path): | |
102 # Get all necessaries infos first | |
103 # 2bit file creation from input fasta | |
104 | |
105 # baseNameFasta = os.path.basename(fasta_file_name) | |
106 # suffixTwoBit, extensionTwoBit = os.path.splitext(baseNameFasta) | |
107 # nameTwoBit = suffixTwoBit + '.2bit' | |
108 twoBitFile = tempfile.NamedTemporaryFile(bufsize=0) | |
109 subtools.faToTwoBit(self.reference_genome.false_path, twoBitFile.name) | |
110 | |
111 # Generate the twoBitInfo | |
112 twoBitInfoFile = tempfile.NamedTemporaryFile(bufsize=0) | |
113 subtools.twoBitInfo(twoBitFile.name, twoBitInfoFile.name) | |
114 | |
115 # Then we get the output to generate the chromSizes | |
116 self.chromSizesFile = tempfile.NamedTemporaryFile(bufsize=0, suffix=".chrom.sizes") | |
117 subtools.sortChromSizes(twoBitInfoFile.name, self.chromSizesFile.name) | |
118 | |
119 # We can get the biggest scaffold here, with chromSizesFile | |
120 with open(self.chromSizesFile.name, 'r') as chrom_sizes: | |
121 # TODO: Check if exists | |
122 self.default_pos = chrom_sizes.readline().split()[0] | |
123 | |
80 # TODO: Manage to put every fill Function in a file dedicated for reading reasons | 124 # TODO: Manage to put every fill Function in a file dedicated for reading reasons |
81 # Create the root directory | 125 # Create the root directory |
82 myHubPath = os.path.join(extra_files_path, "myHub") | 126 myHubPath = os.path.join(extra_files_path, "myHub") |
83 if not os.path.exists(myHubPath): | 127 if not os.path.exists(myHubPath): |
84 os.makedirs(myHubPath) | 128 os.makedirs(myHubPath) |
85 | 129 |
130 # Create the specie folder | |
131 # TODO: Generate the name depending on the specie | |
132 mySpecieFolderPath = os.path.join(myHubPath, self.genome_name) | |
133 if not os.path.exists(mySpecieFolderPath): | |
134 os.makedirs(mySpecieFolderPath) | |
135 self.mySpecieFolderPath = mySpecieFolderPath | |
136 | |
137 # We create the 2bit file while we just created the specie folder | |
138 self.twoBitName = self.genome_name + ".2bit" | |
139 self.two_bit_final_path = os.path.join(self.mySpecieFolderPath, self.twoBitName) | |
140 shutil.copyfile(twoBitFile.name, self.two_bit_final_path) | |
141 | |
86 # Add the genomes.txt file | 142 # Add the genomes.txt file |
87 genomesTxtFilePath = os.path.join(myHubPath, 'genomes.txt') | 143 genomesTxtFilePath = os.path.join(myHubPath, 'genomes.txt') |
88 self.__fillGenomesTxt__(genomesTxtFilePath, toolDirectory) | 144 self.__fillGenomesTxt__(genomesTxtFilePath) |
89 | 145 |
90 # Add the hub.txt file | 146 # Add the hub.txt file |
91 hubTxtFilePath = os.path.join(myHubPath, 'hub.txt') | 147 hubTxtFilePath = os.path.join(myHubPath, 'hub.txt') |
92 self.__fillHubTxt__(hubTxtFilePath, toolDirectory) | 148 self.__fillHubTxt__(hubTxtFilePath) |
93 | 149 |
94 # Add the hub.html file | 150 # Add the hub.html file |
95 # TODO: Change the name and get it depending on the specie | 151 # TODO: Change the name and get it depending on the specie |
96 hubHtmlFilePath = os.path.join(myHubPath, 'dbia.html') | 152 hubHtmlFilePath = os.path.join(myHubPath, 'dbia.html') |
97 self.__fillHubHtmlFile__(hubHtmlFilePath, toolDirectory) | 153 self.__fillHubHtmlFile__(hubHtmlFilePath) |
98 | 154 |
99 # Create the specie folder | |
100 # TODO: Generate the name depending on the specie | |
101 mySpecieFolderPath = os.path.join(myHubPath, "dbia3") | |
102 if not os.path.exists(mySpecieFolderPath): | |
103 os.makedirs(mySpecieFolderPath) | |
104 self.mySpecieFolderPath = mySpecieFolderPath | |
105 | 155 |
106 # Create the description html file in the specie folder | 156 # Create the description html file in the specie folder |
107 descriptionHtmlFilePath = os.path.join(mySpecieFolderPath, 'description.html') | 157 descriptionHtmlFilePath = os.path.join(mySpecieFolderPath, 'description.html') |
108 self.__fillDescriptionHtmlFile__(descriptionHtmlFilePath, toolDirectory) | 158 self.__fillDescriptionHtmlFile__(descriptionHtmlFilePath) |
109 | 159 |
110 # Create the file groups.txt | 160 # Create the file groups.txt |
111 # TODO: If not inputs for this, do no create the file | 161 # TODO: If not inputs for this, do no create the file |
112 groupsTxtFilePath = os.path.join(mySpecieFolderPath, 'groups.txt') | 162 groupsTxtFilePath = os.path.join(mySpecieFolderPath, 'groups.txt') |
113 self.__fillGroupsTxtFile__(groupsTxtFilePath, toolDirectory) | 163 self.__fillGroupsTxtFile__(groupsTxtFilePath) |
114 | 164 |
115 # Create the folder tracks into the specie folder | 165 # Create the folder tracks into the specie folder |
116 tracksFolderPath = os.path.join(mySpecieFolderPath, "tracks") | 166 tracksFolderPath = os.path.join(mySpecieFolderPath, "tracks") |
117 if not os.path.exists(tracksFolderPath): | 167 if not os.path.exists(tracksFolderPath): |
118 os.makedirs(tracksFolderPath) | 168 os.makedirs(tracksFolderPath) |
169 self.myTracksFolderPath = tracksFolderPath | |
119 | 170 |
120 return myHubPath | 171 return myHubPath |
121 | 172 |
122 def __fillGenomesTxt__(self, genomesTxtFilePath, toolDirectory): | 173 def __fillGenomesTxt__(self, genomesTxtFilePath): |
123 # TODO: Think about the inputs and outputs | 174 # TODO: Think about the inputs and outputs |
124 # TODO: Manage the template of this file | 175 # TODO: Manage the template of this file |
125 # renderer = pystache.Renderer(search_dirs="templates/genomesAssembly") | 176 # renderer = pystache.Renderer(search_dirs="templates/genomesAssembly") |
126 pathTemplate = os.path.join(toolDirectory, 'templates/genomesAssembly') | 177 pathTemplate = os.path.join(self.tool_directory, 'templates/genomesAssembly') |
127 mylookup = TemplateLookup(directories=[pathTemplate], output_encoding='utf-8', encoding_errors='replace') | 178 mylookup = TemplateLookup(directories=[pathTemplate], output_encoding='utf-8', encoding_errors='replace') |
128 mytemplate = mylookup.get_template("layout.txt") | 179 mytemplate = mylookup.get_template("layout.txt") |
129 with open(genomesTxtFilePath, 'w') as genomesTxtFile: | 180 with open(genomesTxtFilePath, 'w') as genomesTxtFile: |
130 # Write the content of the file genomes.txt | 181 # Write the content of the file genomes.txt |
131 twoBitPath = os.path.join('dbia3/', self.twoBitName) | 182 twoBitPath = os.path.join(self.genome_name, self.twoBitName) |
132 htmlMakoRendered = mytemplate.render( | 183 htmlMakoRendered = mytemplate.render( |
133 genomeName="dbia3", | 184 genomeName=self.genome_name, |
134 trackDbPath="dbia3/trackDb.txt", | 185 trackDbPath=os.path.join(self.genome_name, "trackDb.txt"), |
135 groupsPath="dbia3/groups.txt", | 186 groupsPath=os.path.join(self.genome_name, "groups.txt"), |
136 genomeDescription="March 2013 Drosophilia biarmipes unplaced genomic scaffold", | 187 genomeDescription=self.genome_name, |
137 twoBitPath=twoBitPath, | 188 twoBitPath=twoBitPath, |
138 organismName="Drosophilia biarmipes", | 189 organismName=self.genome_name, |
139 defaultPosition="contig1", | 190 defaultPosition=self.default_pos, |
140 orderKey="4500", | 191 orderKey="4500", |
141 scientificName="Drosophilia biarmipes", | 192 scientificName=self.genome_name, |
142 pathAssemblyHtmlDescription="dbia3/description.html" | 193 pathAssemblyHtmlDescription=os.path.join(self.genome_name, "description.html") |
143 ) | 194 ) |
144 genomesTxtFile.write(htmlMakoRendered) | 195 genomesTxtFile.write(htmlMakoRendered) |
145 | 196 |
146 def __fillHubTxt__(self, hubTxtFilePath, toolDirectory): | 197 def __fillHubTxt__(self, hubTxtFilePath): |
147 # TODO: Think about the inputs and outputs | 198 # TODO: Think about the inputs and outputs |
148 # TODO: Manage the template of this file | 199 # TODO: Manage the template of this file |
149 mylookup = TemplateLookup(directories=[os.path.join(toolDirectory, 'templates/hubTxt')], | 200 mylookup = TemplateLookup(directories=[os.path.join(self.tool_directory, 'templates/hubTxt')], |
150 output_encoding='utf-8', encoding_errors='replace') | 201 output_encoding='utf-8', encoding_errors='replace') |
151 mytemplate = mylookup.get_template('layout.txt') | 202 mytemplate = mylookup.get_template('layout.txt') |
152 with open(hubTxtFilePath, 'w') as genomesTxtFile: | 203 with open(hubTxtFilePath, 'w') as genomesTxtFile: |
153 # Write the content of the file genomes.txt | 204 # Write the content of the file genomes.txt |
154 htmlMakoRendered = mytemplate.render( | 205 htmlMakoRendered = mytemplate.render( |
155 hubName='dbiaOnly', | 206 hubName=(''.join(['gonramp', self.genome_name.title()])), |
156 shortLabel='dbia', | 207 shortLabel=self.genome_name, |
157 longLabel='This hub only contains dbia with the gene predictions', | 208 longLabel=self.genome_name, |
158 genomesFile='genomes.txt', | 209 genomesFile='genomes.txt', |
159 email='rmarenco@gwu.edu', | 210 email=self.user_email, |
160 descriptionUrl='dbia.html' | 211 descriptionUrl='dbia.html' |
161 ) | 212 ) |
162 genomesTxtFile.write(htmlMakoRendered) | 213 genomesTxtFile.write(htmlMakoRendered) |
163 | 214 |
164 def __fillHubHtmlFile__(self, hubHtmlFilePath, toolDirectory): | 215 def __fillHubHtmlFile__(self, hubHtmlFilePath): |
165 # TODO: Think about the inputs and outputs | 216 # TODO: Think about the inputs and outputs |
166 # TODO: Manage the template of this file | 217 # TODO: Manage the template of this file |
167 # renderer = pystache.Renderer(search_dirs="templates/hubDescription") | 218 # renderer = pystache.Renderer(search_dirs="templates/hubDescription") |
168 # t = Template(templates.hubDescription.layout.html) | 219 # t = Template(templates.hubDescription.layout.html) |
169 mylookup = TemplateLookup(directories=[os.path.join(toolDirectory, 'templates/hubDescription')], | 220 mylookup = TemplateLookup(directories=[os.path.join(self.tool_directory, 'templates/hubDescription')], |
170 output_encoding='utf-8', encoding_errors='replace') | 221 output_encoding='utf-8', encoding_errors='replace') |
171 mytemplate = mylookup.get_template("layout.txt") | 222 mytemplate = mylookup.get_template("layout.txt") |
172 with open(hubHtmlFilePath, 'w') as hubHtmlFile: | 223 with open(hubHtmlFilePath, 'w') as hubHtmlFile: |
173 # Write the content of the file genomes.txt | |
174 # htmlPystached = renderer.render_name( | |
175 # "layout", | |
176 # {'specie': 'Dbia', | |
177 # 'toolUsed': 'Augustus', | |
178 # 'ncbiSpecieUrl': 'http://www.ncbi.nlm.nih.gov/genome/3499', | |
179 # 'genomeID': '3499', | |
180 # 'SpecieFullName': 'Drosophila biarmipes'}) | |
181 htmlMakoRendered = mytemplate.render( | 224 htmlMakoRendered = mytemplate.render( |
182 specie='Dbia', | 225 specie='Dbia', |
183 toolUsed='Augustus', | 226 toolUsed='Augustus', |
184 ncbiSpecieUrl='http://www.ncbi.nlm.nih.gov/genome/3499', | 227 ncbiSpecieUrl='http://www.ncbi.nlm.nih.gov/genome/3499', |
185 genomeID='3499', | 228 genomeID='3499', |
186 specieFullName='Drosophila biarmipes' | 229 specieFullName='Drosophila biarmipes' |
187 ) | 230 ) |
188 # hubHtmlFile.write(htmlPystached) | 231 #hubHtmlFile.write(htmlMakoRendered) |
189 hubHtmlFile.write(htmlMakoRendered) | 232 |
190 | 233 def __fillDescriptionHtmlFile__(self, descriptionHtmlFilePath): |
191 def __fillDescriptionHtmlFile__(self, descriptionHtmlFilePath, toolDirectory): | 234 # TODO: Think about the inputs and outputs |
192 # TODO: Think about the inputs and outputs | 235 # TODO: Manage the template of this file |
193 # TODO: Manage the template of this file | 236 mylookup = TemplateLookup(directories=[os.path.join(self.tool_directory, 'templates/specieDescription')], |
194 mylookup = TemplateLookup(directories=[os.path.join(toolDirectory, 'templates/specieDescription')], | |
195 output_encoding='utf-8', encoding_errors='replace') | 237 output_encoding='utf-8', encoding_errors='replace') |
196 mytemplate = mylookup.get_template("layout.txt") | 238 mytemplate = mylookup.get_template("layout.txt") |
197 with open(descriptionHtmlFilePath, 'w') as descriptionHtmlFile: | 239 with open(descriptionHtmlFilePath, 'w') as descriptionHtmlFile: |
198 # Write the content of the file genomes.txt | 240 # Write the content of the file genomes.txt |
199 htmlMakoRendered = mytemplate.render( | 241 htmlMakoRendered = mytemplate.render( |
200 specieDescription='This is the description of the dbia', | 242 specieDescription='This is the description of the dbia', |
201 ) | 243 ) |
202 descriptionHtmlFile.write(htmlMakoRendered) | 244 #descriptionHtmlFile.write(htmlMakoRendered) |
203 | 245 |
204 def __fillGroupsTxtFile__(self, groupsTxtFilePath, toolDirectory): | 246 def __fillGroupsTxtFile__(self, groupsTxtFilePath): |
205 # TODO: Reenable this function at some point | 247 # TODO: Reenable this function at some point |
206 mylookup = TemplateLookup(directories=[os.path.join(toolDirectory, 'templates/groupsTxt')], | 248 mylookup = TemplateLookup(directories=[os.path.join(self.tool_directory, 'templates/groupsTxt')], |
207 output_encoding='utf-8', encoding_errors='replace') | 249 output_encoding='utf-8', encoding_errors='replace') |
208 mytemplate = mylookup.get_template("layout.txt") | 250 mytemplate = mylookup.get_template("layout.txt") |
209 with open(groupsTxtFilePath, 'w') as groupsTxtFile: | 251 with open(groupsTxtFilePath, 'w') as groupsTxtFile: |
210 # Write the content of groups.txt | 252 # Write the content of groups.txt |
211 # groupsTxtFile.write('name map') | 253 # groupsTxtFile.write('name map') |