Mercurial > repos > jaredgk > ppp_vcfphase
comparison model.py @ 2:54c84f7dcb2c draft
Uploaded
| author | jaredgk |
|---|---|
| date | Wed, 17 Oct 2018 17:20:47 -0400 |
| parents | 3830d29fca6a |
| children |
comparison
equal
deleted
inserted
replaced
| 1:15245deda141 | 2:54c84f7dcb2c |
|---|---|
| 3 import json | 3 import json |
| 4 import subprocess | 4 import subprocess |
| 5 import argparse | 5 import argparse |
| 6 import logging | 6 import logging |
| 7 import itertools | 7 import itertools |
| 8 | 8 import copy |
| 9 from collections import defaultdict | 9 |
| 10 import numpy as np | |
| 11 | |
| 12 from collections import defaultdict, OrderedDict | |
| 10 | 13 |
| 11 # Insert Jared's directory path, required for calling Jared's functions. Change when directory structure changes. | 14 # Insert Jared's directory path, required for calling Jared's functions. Change when directory structure changes. |
| 12 sys.path.insert(0, os.path.abspath(os.path.join(os.pardir, 'jared'))) | 15 sys.path.insert(0, os.path.abspath(os.path.join(os.pardir, 'jared'))) |
| 13 | 16 |
| 14 from logging_module import initLogger | 17 from logging_module import initLogger |
| 18 super(ModelFile, self).__init__(*arg, **kw) | 21 super(ModelFile, self).__init__(*arg, **kw) |
| 19 self.inds = [] | 22 self.inds = [] |
| 20 self.ind_file = '' | 23 self.ind_file = '' |
| 21 self.exclude_file = '' | 24 self.exclude_file = '' |
| 22 | 25 |
| 23 def assign_inds (self, inds = []): | 26 if arg and self.confirm_model_instance(arg[1]): |
| 24 # Return error if inds is empty | 27 self.update_inds(arg[1]) |
| 25 if not inds: | 28 |
| 26 raise IOError('No individuals found in the model file.') | 29 def __setitem__(self, *arg, **kw): |
| 30 super(ModelFile, self).__setitem__(*arg, **kw) | |
| 31 | |
| 32 if arg and self.confirm_model_instance(arg[1]): | |
| 33 self.update_inds(model = arg[1]) | |
| 34 | |
| 35 def __delitem__(self, key): | |
| 36 super(ModelFile, self).__delitem__(key) | |
| 37 self.update_inds() | |
| 38 | |
| 39 def confirm_model_instance (self, unknown): | |
| 40 | |
| 41 if isinstance(unknown, Model): | |
| 42 | |
| 43 return True | |
| 44 | |
| 45 else: | |
| 46 | |
| 47 return False | |
| 48 | |
| 49 def copy_model (self, src_model_name, new_model_name): | |
| 50 | |
| 51 src_model = super(ModelFile, self).__getitem__(src_model_name) | |
| 52 | |
| 53 src_model_copy = copy.deepcopy(src_model) | |
| 54 | |
| 55 src_model_copy.name = new_model_name | |
| 56 | |
| 57 super(ModelFile, self).__setitem__(new_model_name, src_model_copy) | |
| 58 | |
| 59 def rename_model (self, src_model_name, new_model_name): | |
| 60 | |
| 61 src_model = super(ModelFile, self).pop(src_model_name) | |
| 62 | |
| 63 src_model.name = new_model_name | |
| 64 | |
| 65 super(ModelFile, self).__setitem__(new_model_name, src_model) | |
| 66 | |
| 67 def update_inds (self, model = None): | |
| 68 | |
| 69 if self.confirm_model_instance(model): | |
| 70 | |
| 71 # Return error if inds is empty | |
| 72 if not model.inds: | |
| 73 raise IOError('No individuals found in %s.' % model.name) | |
| 74 | |
| 75 # Create a list of the unique individuals | |
| 76 unique_inds = list(set(self.inds + model.inds)) | |
| 77 | |
| 78 else: | |
| 79 | |
| 80 # Create an empty list for the unique individuals | |
| 81 unique_inds = [] | |
| 82 | |
| 83 # Loop the models in the file | |
| 84 for model_in_file in super(ModelFile, self).values(): | |
| 85 | |
| 86 # Create a list of the unique individuals | |
| 87 unique_inds = list(set(unique_inds + model_in_file.inds)) | |
| 88 | |
| 89 | |
| 27 # Store the individuals | 90 # Store the individuals |
| 28 self.inds = [str(ind) for ind in inds] | 91 self.inds = unique_inds |
| 29 | 92 |
| 30 def create_ind_file (self, file_ext = '', file_path = '', overwrite = False): | 93 def create_ind_file (self, file_ext = '', file_path = '', overwrite = False): |
| 31 # Assign the filename for the population file | 94 # Assign the filename for the population file |
| 32 ind_filename = 'unique_individuals' + file_ext | 95 ind_filename = 'unique_individuals' + file_ext |
| 33 | 96 |
| 82 ind_file.close() | 145 ind_file.close() |
| 83 | 146 |
| 84 # Save the individuals filename | 147 # Save the individuals filename |
| 85 self.exclude_file = ind_filename | 148 self.exclude_file = ind_filename |
| 86 | 149 |
| 87 def delete_ind_file (self): | 150 def delete_exclude_ind_file (self): |
| 88 # Check if an individuals file was created | 151 # Check if an individuals file was created |
| 89 if self.exclude_file: | 152 if self.exclude_file: |
| 90 | 153 |
| 91 # Delete the individuals file | 154 # Delete the individuals file |
| 92 os.remove(self.exclude_file) | 155 os.remove(self.exclude_file) |
| 93 | 156 |
| 94 # Remove the filename | 157 # Remove the filename |
| 95 self.exclude_file = '' | 158 self.exclude_file = '' |
| 159 | |
| 160 def to_json (self): | |
| 161 | |
| 162 model_file_json = [] | |
| 163 | |
| 164 for model_name, model_data in super(ModelFile, self).items(): | |
| 165 model_file_json.append(model_data.to_json()) | |
| 166 | |
| 167 return model_file_json | |
| 168 | |
| 96 | 169 |
| 97 class Model: | 170 class Model: |
| 98 def __init__ (self, name): | 171 def __init__ (self, name): |
| 99 self.name = name | 172 self.name = name |
| 100 self.tree = '' | 173 self.tree = '' |
| 101 self.npop = 0 | |
| 102 self.pop_list = [] | 174 self.pop_list = [] |
| 175 self.ind_dict = defaultdict(list) | |
| 103 self.nind = defaultdict(int) | 176 self.nind = defaultdict(int) |
| 104 self.ind_dict = defaultdict(list) | |
| 105 self.pop_files = [] | 177 self.pop_files = [] |
| 106 self.ind_file = '' | 178 self.ind_file = '' |
| 107 | 179 |
| 108 @property | 180 @property |
| 109 def inds(self): | 181 def npop (self): |
| 182 return len(self.pop_list) | |
| 183 | |
| 184 @property | |
| 185 def inds (self): | |
| 110 return list(itertools.chain.from_iterable(self.ind_dict.values())) | 186 return list(itertools.chain.from_iterable(self.ind_dict.values())) |
| 111 | 187 |
| 112 def assign_tree (self, tree): | 188 def assign_tree (self, tree): |
| 113 self.tree = str(tree) | 189 self.tree = str(tree) |
| 114 | 190 |
| 115 def assign_pop (self, pop, inds = []): | 191 def assign_pop (self, pop, inds = []): |
| 116 self.npop += 1 | |
| 117 self.pop_list.append(str(pop)) | 192 self.pop_list.append(str(pop)) |
| 118 if inds: | 193 if inds: |
| 119 self.nind[pop] = len(inds) | |
| 120 self.ind_dict[pop] = [str(ind) for ind in inds] | 194 self.ind_dict[pop] = [str(ind) for ind in inds] |
| 195 self.nind[pop] = len(self.ind_dict[pop]) | |
| 196 | |
| 197 def sample_pop (self, pop, sample_size, with_replacements = False): | |
| 198 | |
| 199 # Confirm the pop is in the model | |
| 200 if str(pop) not in self.pop_list: | |
| 201 | |
| 202 # Raise error if pop not found | |
| 203 raise Exception('%s not found' % pop) | |
| 204 | |
| 205 # Confirm the sample size is an int | |
| 206 try: | |
| 207 | |
| 208 sample_size = int(sample_size) | |
| 209 | |
| 210 except: | |
| 211 | |
| 212 # Raise error if sample_size not an int | |
| 213 raise Exception('%s not int' % sample_size) | |
| 214 | |
| 215 # Check if the sample size is larger than the pop | |
| 216 if int(sample_size) > self.nind[pop]: | |
| 217 | |
| 218 # Raise error if sample_size is larger | |
| 219 raise Exception('%s is larger than %s' % (sample_size, pop)) | |
| 220 | |
| 221 # Use numpy choice to randomly sample the pop | |
| 222 sampled_inds = np.random.choice(self.ind_dict[pop], sample_size, replace = with_replacements) | |
| 223 | |
| 224 # Save the sampled inds as a list | |
| 225 self.ind_dict[pop] = list(sampled_inds) | |
| 226 | |
| 227 def sample_pops (self, sample_size, with_replacements = False): | |
| 228 | |
| 229 # Confirm the sample size is an int | |
| 230 try: | |
| 231 | |
| 232 sample_size = int(sample_size) | |
| 233 | |
| 234 except: | |
| 235 | |
| 236 # Raise error if sample_size not an int | |
| 237 raise Exception('%s not int' % sample_size) | |
| 238 | |
| 239 # Loop each pop in the pop list | |
| 240 for pop in self.pop_list: | |
| 241 | |
| 242 # Check if the sample size is larger than the pop | |
| 243 if int(sample_size) > self.nind[pop]: | |
| 244 | |
| 245 # Raise error if sample_size is larger | |
| 246 raise Exception('%s is larger than %s' % (sample_size, pop)) | |
| 247 | |
| 248 # Loop each pop in the pop list, if no error raised | |
| 249 for pop in self.pop_list: | |
| 250 | |
| 251 # Use numpy choice to randomly sample the pop | |
| 252 sampled_inds = np.random.choice(self.ind_dict[pop], sample_size, replace = with_replacements) | |
| 253 | |
| 254 # Save the sampled inds as a list | |
| 255 self.ind_dict[pop] = list(sampled_inds) | |
| 121 | 256 |
| 122 def create_pop_files (self, file_ext = '', file_path = '', overwrite = False): | 257 def create_pop_files (self, file_ext = '', file_path = '', overwrite = False): |
| 123 for pop in self.pop_list: | 258 for pop in self.pop_list: |
| 124 # Assign the filename for the population file | 259 # Assign the filename for the population file |
| 125 pop_filename = pop + file_ext | 260 pop_filename = pop + file_ext |
| 184 os.remove(self.ind_file) | 319 os.remove(self.ind_file) |
| 185 | 320 |
| 186 # Remove the filename | 321 # Remove the filename |
| 187 self.ind_file = '' | 322 self.ind_file = '' |
| 188 | 323 |
| 189 def read_model_file (model_filename): | 324 def to_json (self): |
| 325 | |
| 326 model_json = OrderedDict() | |
| 327 | |
| 328 model_json['name'] = self.name | |
| 329 | |
| 330 pop_json = OrderedDict() | |
| 331 | |
| 332 for pop in self.pop_list: | |
| 333 | |
| 334 pop_json[pop] = OrderedDict() | |
| 335 | |
| 336 pop_json[pop]['indv'] = self.ind_dict[pop] | |
| 337 | |
| 338 model_json['pops'] = pop_json | |
| 339 | |
| 340 return model_json | |
| 341 | |
| 342 def read_model_file (filename): | |
| 190 | 343 |
| 191 # Check that the file exists | 344 # Check that the file exists |
| 192 if not os.path.isfile(model_filename): | 345 if not os.path.isfile(filename): |
| 193 raise IOError | 346 raise IOError |
| 194 | 347 |
| 195 # Create ModelFile object | 348 # Create ModelFile object |
| 196 models_to_return = ModelFile() | 349 models_to_return = ModelFile() |
| 197 | 350 |
| 198 # Check if using python 2 or 3 | 351 # Check if using python 2 or 3 |
| 199 if sys.version_info[0] == 2: | 352 if sys.version_info[0] == 2: |
| 200 # Open the model file in python 2 | 353 # Open the model file in python 2 |
| 201 model_file = open(model_filename, 'rU') | 354 model_file = open(filename, 'rU') |
| 202 else: | 355 else: |
| 203 # Open the model file in python 3 | 356 # Open the model file in python 3 |
| 204 model_file = open(model_filename, 'r', newline=None) | 357 model_file = open(filename, 'r', newline=None) |
| 205 | 358 |
| 206 # Parse the model file using the json reader | 359 # Parse the model file using the json reader |
| 207 models_dict = json.load(model_file) | 360 models_dict = json.load(model_file) |
| 208 | 361 |
| 209 # List to store all unique individuals (i.e. individuals in all models) | 362 # List to store all unique individuals (i.e. individuals in all models) |
| 211 | 364 |
| 212 # Loop the parsed models | 365 # Loop the parsed models |
| 213 for model_dict in models_dict: | 366 for model_dict in models_dict: |
| 214 | 367 |
| 215 # Create the model | 368 # Create the model |
| 216 model = Model(model_dict['name']) | 369 model = Model(str(model_dict['name'])) |
| 217 | 370 |
| 218 # Loop the populations in the model | 371 # Loop the populations in the model |
| 219 for pop, pop_dict in model_dict['pops'].items(): | 372 for pop, pop_dict in model_dict['pops'].items(): |
| 220 | 373 |
| 374 # Convert all individuals names to str | |
| 375 ind_list = [str(pop_ind) for pop_ind in pop_dict['inds']] | |
| 376 | |
| 221 # Assign the population ans it's individuals to the model | 377 # Assign the population ans it's individuals to the model |
| 222 model.assign_pop(pop, pop_dict['inds']) | 378 model.assign_pop(str(pop), ind_list) |
| 379 | |
| 223 # Assign the individuals to the unique individual list | 380 # Assign the individuals to the unique individual list |
| 224 individual_list.extend(pop_dict['inds']) | 381 individual_list.extend(ind_list) |
| 225 | 382 |
| 226 # Remove duplicates from the unique individual list | 383 # Remove duplicates from the unique individual list |
| 227 individual_list = list(set(individual_list)) | 384 individual_list = list(set(individual_list)) |
| 228 | 385 |
| 229 # Save the model | 386 # Save the model |
| 230 models_to_return[str(model.name)] = model | 387 models_to_return[str(model.name)] = model |
| 231 | 388 |
| 232 # Store the unique individuals within the ModelFile object | 389 logging.info('Finished reading model file (%s)' % filename) |
| 233 models_to_return.assign_inds(individual_list) | |
| 234 | 390 |
| 235 # Return the models | 391 # Return the models |
| 236 return models_to_return | 392 return models_to_return |
| 393 | |
| 394 def write_model_file (model_file, filename, overwrite = False): | |
| 395 | |
| 396 # Check if the file is to be overwritten | |
| 397 if not overwrite: | |
| 398 | |
| 399 # Check if the file exists | |
| 400 if os.path.exists(filename): | |
| 401 raise Exception('%s already exists' % filename) | |
| 402 | |
| 403 # Open the output file | |
| 404 output_file = open(filename, 'w') | |
| 405 | |
| 406 # Write the json-formmated data to the output file | |
| 407 output_file.write(json.dumps(model_file.to_json(), indent = 4)) | |
| 408 | |
| 409 # Close the output file | |
| 410 output_file.close() | |
| 411 | |
| 412 logging.info('Finished writing model file (%s)' % filename) |
