Mercurial > repos > rmarenco > hubarchivecreator
comparison TrackHub.py @ 10:acc233161f50 draft
planemo upload for repository https://github.com/goeckslab/hub-archive-creator commit 1b1063f90004764bcf504f4340738eca5c4b1f9d
| author | rmarenco | 
|---|---|
| date | Thu, 21 Jul 2016 05:58:51 -0400 | 
| parents | 4f9847539a28 | 
| children | d05236b15f81 | 
   comparison
  equal
  deleted
  inserted
  replaced
| 9:4f9847539a28 | 10:acc233161f50 | 
|---|---|
| 1 #!/usr/bin/python | 1 #!/usr/bin/python | 
| 2 # -*- coding: utf8 -*- | 2 # -*- coding: utf8 -*- | 
| 3 | 3 | 
| 4 import os | 4 import os | 
| 5 import tempfile | |
| 6 import shutil | |
| 5 import zipfile | 7 import zipfile | 
| 8 | |
| 9 # Internal dependencies | |
| 10 from Datatype import Datatype | |
| 11 from util import subtools | |
| 6 | 12 | 
| 7 from mako.lookup import TemplateLookup | 13 from mako.lookup import TemplateLookup | 
| 8 | 14 | 
| 9 | 15 | 
| 10 class TrackHub(object): | 16 class TrackHub(object): | 
| 11 """docstring for TrackHub""" | 17 """docstring for TrackHub""" | 
| 12 | 18 | 
| 13 def __init__(self, inputFastaFile, outputFile, extra_files_path, tool_directory): | 19 def __init__(self, inputFastaFile, user_email, outputFile, extra_files_path, tool_directory): | 
| 14 super(TrackHub, self).__init__() | 20 super(TrackHub, self).__init__() | 
| 15 | 21 | 
| 16 self.rootAssemblyHub = None | 22 self.rootAssemblyHub = None | 
| 23 | |
| 17 self.mySpecieFolderPath = None | 24 self.mySpecieFolderPath = None | 
| 25 self.myTracksFolderPath = None | |
| 18 self.tool_directory = tool_directory | 26 self.tool_directory = tool_directory | 
| 27 | |
| 28 self.reference_genome = inputFastaFile | |
| 29 # TODO: Add the specie name | |
| 30 self.genome_name = inputFastaFile.assembly_id | |
| 31 self.default_pos = None | |
| 32 self.user_email = user_email | |
| 19 | 33 | 
| 20 # TODO: Modify according to the files passed in parameter | 34 # TODO: Modify according to the files passed in parameter | 
| 21 mylookup = TemplateLookup(directories=[os.path.join(tool_directory, 'templates/trackDb')], | 35 mylookup = TemplateLookup(directories=[os.path.join(tool_directory, 'templates/trackDb')], | 
| 22 output_encoding='utf-8', encoding_errors='replace') | 36 output_encoding='utf-8', encoding_errors='replace') | 
| 23 self.trackDbTemplate = mylookup.get_template("layout.txt") | 37 self.trackDbTemplate = mylookup.get_template("layout.txt") | 
| 24 | 38 | 
| 25 self.extra_files_path = extra_files_path | 39 self.extra_files_path = extra_files_path | 
| 26 self.outputFile = outputFile | 40 self.outputFile = outputFile | 
| 27 | 41 | 
| 28 inputFastaFile = open(inputFastaFile, 'r') | |
| 29 #self.outputZip = zipfile.ZipFile(os.path.join(extra_files_path, 'myHub.zip'), 'w', allowZip64=True) | |
| 30 | |
| 31 # Create the structure of the Assembly Hub | 42 # Create the structure of the Assembly Hub | 
| 32 # TODO: Merge the following processing into a function as it is also used in twoBitCreator | 43 # TODO: Merge the following processing into a function as it is also used in twoBitCreator | 
| 33 baseNameFasta = os.path.basename(inputFastaFile.name) | 44 self.twoBitName = None | 
| 34 suffixTwoBit, extensionTwoBit = os.path.splitext(baseNameFasta) | 45 self.two_bit_final_path = None | 
| 35 self.twoBitName = suffixTwoBit + '.2bit' | 46 self.chromSizesFile = None | 
| 36 | 47 | 
| 37 self.rootAssemblyHub = self.__createAssemblyHub__(toolDirectory=tool_directory, | 48 self.default_pos = None | 
| 38 extra_files_path=extra_files_path) | 49 | 
| 50 # Set all the missing variables of this class, and create physically the folders/files | |
| 51 self.rootAssemblyHub = self.__createAssemblyHub__(extra_files_path=extra_files_path) | |
| 52 | |
| 53 # Init the Datatype | |
| 54 Datatype.pre_init(self.reference_genome, self.two_bit_final_path, self.chromSizesFile, | |
| 55 self.extra_files_path, self.tool_directory, | |
| 56 self.mySpecieFolderPath, self.myTracksFolderPath) | |
| 39 | 57 | 
| 40 def createZip(self): | 58 def createZip(self): | 
| 41 for root, dirs, files in os.walk(self.rootAssemblyHub): | 59 for root, dirs, files in os.walk(self.rootAssemblyHub): | 
| 42 # Get all files and construct the dir at the same time | 60 # Get all files and construct the dir at the same time | 
| 43 for file in files: | 61 for file in files: | 
| 58 ) | 76 ) | 
| 59 trackDbFile.write(htmlMakoRendered) | 77 trackDbFile.write(htmlMakoRendered) | 
| 60 | 78 | 
| 61 def terminate(self): | 79 def terminate(self): | 
| 62 # Just a test to output a simple HTML | 80 # Just a test to output a simple HTML | 
| 81 # TODO: Create a class to handle the file object | |
| 82 mylookup = TemplateLookup(directories=[os.path.join(self.tool_directory, 'templates')], | |
| 83 output_encoding='utf-8', encoding_errors='replace') | |
| 84 | |
| 85 mytemplate = mylookup.get_template('display.txt') | |
| 63 with open(self.outputFile, 'w') as htmlOutput: | 86 with open(self.outputFile, 'w') as htmlOutput: | 
| 64 htmlOutput.write('<html>') | 87 # TODO: We are basically looping two times: One time with os.walk, Second time | 
| 65 htmlOutput.write('<body>') | 88 # with the template. We could improve that if the number of files begins to be really important | 
| 66 htmlOutput.write('<p>') | 89 list_relative_file_path = [ ] | 
| 67 htmlOutput.write('The following has been generated by Hub Archive Creator:') | |
| 68 htmlOutput.write('</p>') | |
| 69 htmlOutput.write('<ul>') | |
| 70 for root, dirs, files in os.walk(self.extra_files_path): | 90 for root, dirs, files in os.walk(self.extra_files_path): | 
| 71 for file in files: | 91 for file in files: | 
| 72 relDir = os.path.relpath(root, self.extra_files_path) | 92 relative_directory = os.path.relpath(root, self.extra_files_path) | 
| 73 htmlOutput.write(str.format('<li><a href="{0}">{1}</a></li>', os.path.join(relDir, file), | 93 relative_file_path = os.path.join(relative_directory, file) | 
| 74 os.path.join(relDir, file))) | 94 list_relative_file_path.append(relative_file_path) | 
| 75 htmlOutput.write('<ul>') | 95 | 
| 76 htmlOutput.write('</body>') | 96 htmlMakoRendered = mytemplate.render( | 
| 77 htmlOutput.write('</html>') | 97 list_relative_file_path=list_relative_file_path | 
| 78 | 98 ) | 
| 79 def __createAssemblyHub__(self, toolDirectory, extra_files_path): | 99 htmlOutput.write(htmlMakoRendered) | 
| 100 | |
| 101 def __createAssemblyHub__(self, extra_files_path): | |
| 102 # Get all necessaries infos first | |
| 103 # 2bit file creation from input fasta | |
| 104 | |
| 105 # baseNameFasta = os.path.basename(fasta_file_name) | |
| 106 # suffixTwoBit, extensionTwoBit = os.path.splitext(baseNameFasta) | |
| 107 # nameTwoBit = suffixTwoBit + '.2bit' | |
| 108 twoBitFile = tempfile.NamedTemporaryFile(bufsize=0) | |
| 109 subtools.faToTwoBit(self.reference_genome.false_path, twoBitFile.name) | |
| 110 | |
| 111 # Generate the twoBitInfo | |
| 112 twoBitInfoFile = tempfile.NamedTemporaryFile(bufsize=0) | |
| 113 subtools.twoBitInfo(twoBitFile.name, twoBitInfoFile.name) | |
| 114 | |
| 115 # Then we get the output to generate the chromSizes | |
| 116 self.chromSizesFile = tempfile.NamedTemporaryFile(bufsize=0, suffix=".chrom.sizes") | |
| 117 subtools.sortChromSizes(twoBitInfoFile.name, self.chromSizesFile.name) | |
| 118 | |
| 119 # We can get the biggest scaffold here, with chromSizesFile | |
| 120 with open(self.chromSizesFile.name, 'r') as chrom_sizes: | |
| 121 # TODO: Check if exists | |
| 122 self.default_pos = chrom_sizes.readline().split()[0] | |
| 123 | |
| 80 # TODO: Manage to put every fill Function in a file dedicated for reading reasons | 124 # TODO: Manage to put every fill Function in a file dedicated for reading reasons | 
| 81 # Create the root directory | 125 # Create the root directory | 
| 82 myHubPath = os.path.join(extra_files_path, "myHub") | 126 myHubPath = os.path.join(extra_files_path, "myHub") | 
| 83 if not os.path.exists(myHubPath): | 127 if not os.path.exists(myHubPath): | 
| 84 os.makedirs(myHubPath) | 128 os.makedirs(myHubPath) | 
| 85 | 129 | 
| 130 # Create the specie folder | |
| 131 # TODO: Generate the name depending on the specie | |
| 132 mySpecieFolderPath = os.path.join(myHubPath, self.genome_name) | |
| 133 if not os.path.exists(mySpecieFolderPath): | |
| 134 os.makedirs(mySpecieFolderPath) | |
| 135 self.mySpecieFolderPath = mySpecieFolderPath | |
| 136 | |
| 137 # We create the 2bit file while we just created the specie folder | |
| 138 self.twoBitName = self.genome_name + ".2bit" | |
| 139 self.two_bit_final_path = os.path.join(self.mySpecieFolderPath, self.twoBitName) | |
| 140 shutil.copyfile(twoBitFile.name, self.two_bit_final_path) | |
| 141 | |
| 86 # Add the genomes.txt file | 142 # Add the genomes.txt file | 
| 87 genomesTxtFilePath = os.path.join(myHubPath, 'genomes.txt') | 143 genomesTxtFilePath = os.path.join(myHubPath, 'genomes.txt') | 
| 88 self.__fillGenomesTxt__(genomesTxtFilePath, toolDirectory) | 144 self.__fillGenomesTxt__(genomesTxtFilePath) | 
| 89 | 145 | 
| 90 # Add the hub.txt file | 146 # Add the hub.txt file | 
| 91 hubTxtFilePath = os.path.join(myHubPath, 'hub.txt') | 147 hubTxtFilePath = os.path.join(myHubPath, 'hub.txt') | 
| 92 self.__fillHubTxt__(hubTxtFilePath, toolDirectory) | 148 self.__fillHubTxt__(hubTxtFilePath) | 
| 93 | 149 | 
| 94 # Add the hub.html file | 150 # Add the hub.html file | 
| 95 # TODO: Change the name and get it depending on the specie | 151 # TODO: Change the name and get it depending on the specie | 
| 96 hubHtmlFilePath = os.path.join(myHubPath, 'dbia.html') | 152 hubHtmlFilePath = os.path.join(myHubPath, 'dbia.html') | 
| 97 self.__fillHubHtmlFile__(hubHtmlFilePath, toolDirectory) | 153 self.__fillHubHtmlFile__(hubHtmlFilePath) | 
| 98 | 154 | 
| 99 # Create the specie folder | |
| 100 # TODO: Generate the name depending on the specie | |
| 101 mySpecieFolderPath = os.path.join(myHubPath, "dbia3") | |
| 102 if not os.path.exists(mySpecieFolderPath): | |
| 103 os.makedirs(mySpecieFolderPath) | |
| 104 self.mySpecieFolderPath = mySpecieFolderPath | |
| 105 | 155 | 
| 106 # Create the description html file in the specie folder | 156 # Create the description html file in the specie folder | 
| 107 descriptionHtmlFilePath = os.path.join(mySpecieFolderPath, 'description.html') | 157 descriptionHtmlFilePath = os.path.join(mySpecieFolderPath, 'description.html') | 
| 108 self.__fillDescriptionHtmlFile__(descriptionHtmlFilePath, toolDirectory) | 158 self.__fillDescriptionHtmlFile__(descriptionHtmlFilePath) | 
| 109 | 159 | 
| 110 # Create the file groups.txt | 160 # Create the file groups.txt | 
| 111 # TODO: If not inputs for this, do no create the file | 161 # TODO: If not inputs for this, do no create the file | 
| 112 groupsTxtFilePath = os.path.join(mySpecieFolderPath, 'groups.txt') | 162 groupsTxtFilePath = os.path.join(mySpecieFolderPath, 'groups.txt') | 
| 113 self.__fillGroupsTxtFile__(groupsTxtFilePath, toolDirectory) | 163 self.__fillGroupsTxtFile__(groupsTxtFilePath) | 
| 114 | 164 | 
| 115 # Create the folder tracks into the specie folder | 165 # Create the folder tracks into the specie folder | 
| 116 tracksFolderPath = os.path.join(mySpecieFolderPath, "tracks") | 166 tracksFolderPath = os.path.join(mySpecieFolderPath, "tracks") | 
| 117 if not os.path.exists(tracksFolderPath): | 167 if not os.path.exists(tracksFolderPath): | 
| 118 os.makedirs(tracksFolderPath) | 168 os.makedirs(tracksFolderPath) | 
| 169 self.myTracksFolderPath = tracksFolderPath | |
| 119 | 170 | 
| 120 return myHubPath | 171 return myHubPath | 
| 121 | 172 | 
| 122 def __fillGenomesTxt__(self, genomesTxtFilePath, toolDirectory): | 173 def __fillGenomesTxt__(self, genomesTxtFilePath): | 
| 123 # TODO: Think about the inputs and outputs | 174 # TODO: Think about the inputs and outputs | 
| 124 # TODO: Manage the template of this file | 175 # TODO: Manage the template of this file | 
| 125 # renderer = pystache.Renderer(search_dirs="templates/genomesAssembly") | 176 # renderer = pystache.Renderer(search_dirs="templates/genomesAssembly") | 
| 126 pathTemplate = os.path.join(toolDirectory, 'templates/genomesAssembly') | 177 pathTemplate = os.path.join(self.tool_directory, 'templates/genomesAssembly') | 
| 127 mylookup = TemplateLookup(directories=[pathTemplate], output_encoding='utf-8', encoding_errors='replace') | 178 mylookup = TemplateLookup(directories=[pathTemplate], output_encoding='utf-8', encoding_errors='replace') | 
| 128 mytemplate = mylookup.get_template("layout.txt") | 179 mytemplate = mylookup.get_template("layout.txt") | 
| 129 with open(genomesTxtFilePath, 'w') as genomesTxtFile: | 180 with open(genomesTxtFilePath, 'w') as genomesTxtFile: | 
| 130 # Write the content of the file genomes.txt | 181 # Write the content of the file genomes.txt | 
| 131 twoBitPath = os.path.join('dbia3/', self.twoBitName) | 182 twoBitPath = os.path.join(self.genome_name, self.twoBitName) | 
| 132 htmlMakoRendered = mytemplate.render( | 183 htmlMakoRendered = mytemplate.render( | 
| 133 genomeName="dbia3", | 184 genomeName=self.genome_name, | 
| 134 trackDbPath="dbia3/trackDb.txt", | 185 trackDbPath=os.path.join(self.genome_name, "trackDb.txt"), | 
| 135 groupsPath="dbia3/groups.txt", | 186 groupsPath=os.path.join(self.genome_name, "groups.txt"), | 
| 136 genomeDescription="March 2013 Drosophilia biarmipes unplaced genomic scaffold", | 187 genomeDescription=self.genome_name, | 
| 137 twoBitPath=twoBitPath, | 188 twoBitPath=twoBitPath, | 
| 138 organismName="Drosophilia biarmipes", | 189 organismName=self.genome_name, | 
| 139 defaultPosition="contig1", | 190 defaultPosition=self.default_pos, | 
| 140 orderKey="4500", | 191 orderKey="4500", | 
| 141 scientificName="Drosophilia biarmipes", | 192 scientificName=self.genome_name, | 
| 142 pathAssemblyHtmlDescription="dbia3/description.html" | 193 pathAssemblyHtmlDescription=os.path.join(self.genome_name, "description.html") | 
| 143 ) | 194 ) | 
| 144 genomesTxtFile.write(htmlMakoRendered) | 195 genomesTxtFile.write(htmlMakoRendered) | 
| 145 | 196 | 
| 146 def __fillHubTxt__(self, hubTxtFilePath, toolDirectory): | 197 def __fillHubTxt__(self, hubTxtFilePath): | 
| 147 # TODO: Think about the inputs and outputs | 198 # TODO: Think about the inputs and outputs | 
| 148 # TODO: Manage the template of this file | 199 # TODO: Manage the template of this file | 
| 149 mylookup = TemplateLookup(directories=[os.path.join(toolDirectory, 'templates/hubTxt')], | 200 mylookup = TemplateLookup(directories=[os.path.join(self.tool_directory, 'templates/hubTxt')], | 
| 150 output_encoding='utf-8', encoding_errors='replace') | 201 output_encoding='utf-8', encoding_errors='replace') | 
| 151 mytemplate = mylookup.get_template('layout.txt') | 202 mytemplate = mylookup.get_template('layout.txt') | 
| 152 with open(hubTxtFilePath, 'w') as genomesTxtFile: | 203 with open(hubTxtFilePath, 'w') as genomesTxtFile: | 
| 153 # Write the content of the file genomes.txt | 204 # Write the content of the file genomes.txt | 
| 154 htmlMakoRendered = mytemplate.render( | 205 htmlMakoRendered = mytemplate.render( | 
| 155 hubName='dbiaOnly', | 206 hubName=(''.join(['gonramp', self.genome_name.title()])), | 
| 156 shortLabel='dbia', | 207 shortLabel=self.genome_name, | 
| 157 longLabel='This hub only contains dbia with the gene predictions', | 208 longLabel=self.genome_name, | 
| 158 genomesFile='genomes.txt', | 209 genomesFile='genomes.txt', | 
| 159 email='rmarenco@gwu.edu', | 210 email=self.user_email, | 
| 160 descriptionUrl='dbia.html' | 211 descriptionUrl='dbia.html' | 
| 161 ) | 212 ) | 
| 162 genomesTxtFile.write(htmlMakoRendered) | 213 genomesTxtFile.write(htmlMakoRendered) | 
| 163 | 214 | 
| 164 def __fillHubHtmlFile__(self, hubHtmlFilePath, toolDirectory): | 215 def __fillHubHtmlFile__(self, hubHtmlFilePath): | 
| 165 # TODO: Think about the inputs and outputs | 216 # TODO: Think about the inputs and outputs | 
| 166 # TODO: Manage the template of this file | 217 # TODO: Manage the template of this file | 
| 167 # renderer = pystache.Renderer(search_dirs="templates/hubDescription") | 218 # renderer = pystache.Renderer(search_dirs="templates/hubDescription") | 
| 168 # t = Template(templates.hubDescription.layout.html) | 219 # t = Template(templates.hubDescription.layout.html) | 
| 169 mylookup = TemplateLookup(directories=[os.path.join(toolDirectory, 'templates/hubDescription')], | 220 mylookup = TemplateLookup(directories=[os.path.join(self.tool_directory, 'templates/hubDescription')], | 
| 170 output_encoding='utf-8', encoding_errors='replace') | 221 output_encoding='utf-8', encoding_errors='replace') | 
| 171 mytemplate = mylookup.get_template("layout.txt") | 222 mytemplate = mylookup.get_template("layout.txt") | 
| 172 with open(hubHtmlFilePath, 'w') as hubHtmlFile: | 223 with open(hubHtmlFilePath, 'w') as hubHtmlFile: | 
| 173 # Write the content of the file genomes.txt | |
| 174 # htmlPystached = renderer.render_name( | |
| 175 # "layout", | |
| 176 # {'specie': 'Dbia', | |
| 177 # 'toolUsed': 'Augustus', | |
| 178 # 'ncbiSpecieUrl': 'http://www.ncbi.nlm.nih.gov/genome/3499', | |
| 179 # 'genomeID': '3499', | |
| 180 # 'SpecieFullName': 'Drosophila biarmipes'}) | |
| 181 htmlMakoRendered = mytemplate.render( | 224 htmlMakoRendered = mytemplate.render( | 
| 182 specie='Dbia', | 225 specie='Dbia', | 
| 183 toolUsed='Augustus', | 226 toolUsed='Augustus', | 
| 184 ncbiSpecieUrl='http://www.ncbi.nlm.nih.gov/genome/3499', | 227 ncbiSpecieUrl='http://www.ncbi.nlm.nih.gov/genome/3499', | 
| 185 genomeID='3499', | 228 genomeID='3499', | 
| 186 specieFullName='Drosophila biarmipes' | 229 specieFullName='Drosophila biarmipes' | 
| 187 ) | 230 ) | 
| 188 # hubHtmlFile.write(htmlPystached) | 231 #hubHtmlFile.write(htmlMakoRendered) | 
| 189 hubHtmlFile.write(htmlMakoRendered) | 232 | 
| 190 | 233 def __fillDescriptionHtmlFile__(self, descriptionHtmlFilePath): | 
| 191 def __fillDescriptionHtmlFile__(self, descriptionHtmlFilePath, toolDirectory): | 234 # TODO: Think about the inputs and outputs | 
| 192 # TODO: Think about the inputs and outputs | 235 # TODO: Manage the template of this file | 
| 193 # TODO: Manage the template of this file | 236 mylookup = TemplateLookup(directories=[os.path.join(self.tool_directory, 'templates/specieDescription')], | 
| 194 mylookup = TemplateLookup(directories=[os.path.join(toolDirectory, 'templates/specieDescription')], | |
| 195 output_encoding='utf-8', encoding_errors='replace') | 237 output_encoding='utf-8', encoding_errors='replace') | 
| 196 mytemplate = mylookup.get_template("layout.txt") | 238 mytemplate = mylookup.get_template("layout.txt") | 
| 197 with open(descriptionHtmlFilePath, 'w') as descriptionHtmlFile: | 239 with open(descriptionHtmlFilePath, 'w') as descriptionHtmlFile: | 
| 198 # Write the content of the file genomes.txt | 240 # Write the content of the file genomes.txt | 
| 199 htmlMakoRendered = mytemplate.render( | 241 htmlMakoRendered = mytemplate.render( | 
| 200 specieDescription='This is the description of the dbia', | 242 specieDescription='This is the description of the dbia', | 
| 201 ) | 243 ) | 
| 202 descriptionHtmlFile.write(htmlMakoRendered) | 244 #descriptionHtmlFile.write(htmlMakoRendered) | 
| 203 | 245 | 
| 204 def __fillGroupsTxtFile__(self, groupsTxtFilePath, toolDirectory): | 246 def __fillGroupsTxtFile__(self, groupsTxtFilePath): | 
| 205 # TODO: Reenable this function at some point | 247 # TODO: Reenable this function at some point | 
| 206 mylookup = TemplateLookup(directories=[os.path.join(toolDirectory, 'templates/groupsTxt')], | 248 mylookup = TemplateLookup(directories=[os.path.join(self.tool_directory, 'templates/groupsTxt')], | 
| 207 output_encoding='utf-8', encoding_errors='replace') | 249 output_encoding='utf-8', encoding_errors='replace') | 
| 208 mytemplate = mylookup.get_template("layout.txt") | 250 mytemplate = mylookup.get_template("layout.txt") | 
| 209 with open(groupsTxtFilePath, 'w') as groupsTxtFile: | 251 with open(groupsTxtFilePath, 'w') as groupsTxtFile: | 
| 210 # Write the content of groups.txt | 252 # Write the content of groups.txt | 
| 211 # groupsTxtFile.write('name map') | 253 # groupsTxtFile.write('name map') | 
