Mercurial > repos > jaredgk > ppp_vcfphase
comparison model.py @ 0:3830d29fca6a draft
Uploaded
| author | jaredgk |
|---|---|
| date | Mon, 15 Oct 2018 18:15:47 -0400 |
| parents | |
| children | 54c84f7dcb2c |
comparison
equal
deleted
inserted
replaced
| -1:000000000000 | 0:3830d29fca6a |
|---|---|
| 1 import os | |
| 2 import sys | |
| 3 import json | |
| 4 import subprocess | |
| 5 import argparse | |
| 6 import logging | |
| 7 import itertools | |
| 8 | |
| 9 from collections import defaultdict | |
| 10 | |
| 11 # Insert Jared's directory path, required for calling Jared's functions. Change when directory structure changes. | |
| 12 sys.path.insert(0, os.path.abspath(os.path.join(os.pardir, 'jared'))) | |
| 13 | |
| 14 from logging_module import initLogger | |
| 15 | |
| 16 class ModelFile(dict): | |
| 17 def __init__(self, *arg, **kw): | |
| 18 super(ModelFile, self).__init__(*arg, **kw) | |
| 19 self.inds = [] | |
| 20 self.ind_file = '' | |
| 21 self.exclude_file = '' | |
| 22 | |
| 23 def assign_inds (self, inds = []): | |
| 24 # Return error if inds is empty | |
| 25 if not inds: | |
| 26 raise IOError('No individuals found in the model file.') | |
| 27 # Store the individuals | |
| 28 self.inds = [str(ind) for ind in inds] | |
| 29 | |
| 30 def create_ind_file (self, file_ext = '', file_path = '', overwrite = False): | |
| 31 # Assign the filename for the population file | |
| 32 ind_filename = 'unique_individuals' + file_ext | |
| 33 | |
| 34 # If a path is assigned, create the file at the specified location | |
| 35 if file_path: | |
| 36 ind_filename = os.path.join(file_path, ind_filename) | |
| 37 | |
| 38 # Check if previous files should be overwriten | |
| 39 if not overwrite: | |
| 40 # Check if the file already exists | |
| 41 if os.path.isfile(ind_filename): | |
| 42 raise IOError('Individuals file exists.') | |
| 43 | |
| 44 # Create the population file | |
| 45 ind_file = open(ind_filename, 'w') | |
| 46 ind_file.write('%s\n' %'\n'.join(self.inds)) | |
| 47 ind_file.close() | |
| 48 | |
| 49 # Save the individuals filename | |
| 50 self.ind_file = ind_filename | |
| 51 | |
| 52 def delete_ind_file (self): | |
| 53 # Check if an individuals file was created | |
| 54 if self.ind_file: | |
| 55 | |
| 56 # Delete the individuals file | |
| 57 os.remove(self.ind_file) | |
| 58 | |
| 59 # Remove the filename | |
| 60 self.ind_file = '' | |
| 61 | |
| 62 def create_exclude_ind_file (self, inds_to_include = [], file_ext = '', file_path = '', overwrite = False): | |
| 63 # Assign the filename for the population file | |
| 64 ind_filename = 'exclude_individuals' + file_ext | |
| 65 | |
| 66 # If a path is assigned, create the file at the specified location | |
| 67 if file_path: | |
| 68 ind_filename = os.path.join(file_path, ind_filename) | |
| 69 | |
| 70 # Check if previous files should be overwriten | |
| 71 if not overwrite: | |
| 72 # Check if the file already exists | |
| 73 if os.path.isfile(ind_filename): | |
| 74 raise IOError('Individuals file exists.') | |
| 75 | |
| 76 # Create exclude list by removing included individuals | |
| 77 exclude_inds = list(set(self.inds) - set(inds_to_include)) | |
| 78 | |
| 79 # Create the population file | |
| 80 ind_file = open(ind_filename, 'w') | |
| 81 ind_file.write('%s\n' %'\n'.join(exclude_inds)) | |
| 82 ind_file.close() | |
| 83 | |
| 84 # Save the individuals filename | |
| 85 self.exclude_file = ind_filename | |
| 86 | |
| 87 def delete_ind_file (self): | |
| 88 # Check if an individuals file was created | |
| 89 if self.exclude_file: | |
| 90 | |
| 91 # Delete the individuals file | |
| 92 os.remove(self.exclude_file) | |
| 93 | |
| 94 # Remove the filename | |
| 95 self.exclude_file = '' | |
| 96 | |
| 97 class Model: | |
| 98 def __init__ (self, name): | |
| 99 self.name = name | |
| 100 self.tree = '' | |
| 101 self.npop = 0 | |
| 102 self.pop_list = [] | |
| 103 self.nind = defaultdict(int) | |
| 104 self.ind_dict = defaultdict(list) | |
| 105 self.pop_files = [] | |
| 106 self.ind_file = '' | |
| 107 | |
| 108 @property | |
| 109 def inds(self): | |
| 110 return list(itertools.chain.from_iterable(self.ind_dict.values())) | |
| 111 | |
| 112 def assign_tree (self, tree): | |
| 113 self.tree = str(tree) | |
| 114 | |
| 115 def assign_pop (self, pop, inds = []): | |
| 116 self.npop += 1 | |
| 117 self.pop_list.append(str(pop)) | |
| 118 if inds: | |
| 119 self.nind[pop] = len(inds) | |
| 120 self.ind_dict[pop] = [str(ind) for ind in inds] | |
| 121 | |
| 122 def create_pop_files (self, file_ext = '', file_path = '', overwrite = False): | |
| 123 for pop in self.pop_list: | |
| 124 # Assign the filename for the population file | |
| 125 pop_filename = pop + file_ext | |
| 126 | |
| 127 # If a path is assigned, create the file at the specified location | |
| 128 if file_path: | |
| 129 pop_filename = os.path.join(file_path, pop_filename) | |
| 130 | |
| 131 # Check if previous files should be overwriten | |
| 132 if not overwrite: | |
| 133 # Check if the file already exists | |
| 134 if os.path.isfile(pop_filename): | |
| 135 raise IOError('Population file exists.') | |
| 136 | |
| 137 # Create the population file | |
| 138 pop_file = open(pop_filename, 'w') | |
| 139 pop_file.write('%s\n' %'\n'.join(self.ind_dict[pop])) | |
| 140 pop_file.close() | |
| 141 | |
| 142 # Save the population filename | |
| 143 self.pop_files.append(pop_filename) | |
| 144 | |
| 145 def delete_pop_files (self): | |
| 146 # Check if pop files were created | |
| 147 if len(self.pop_files) != 0: | |
| 148 | |
| 149 # Loop the created pop files | |
| 150 for pop_file in self.pop_files: | |
| 151 # Delete the pop file | |
| 152 os.remove(pop_file) | |
| 153 | |
| 154 # Remove the filenames | |
| 155 self.pop_files = [] | |
| 156 | |
| 157 def create_ind_file (self, file_ext = '', file_path = '', overwrite = False): | |
| 158 # Assign the filename for the population file | |
| 159 ind_filename = 'individual.keep' + file_ext | |
| 160 | |
| 161 # If a path is assigned, create the file at the specified location | |
| 162 if file_path: | |
| 163 ind_filename = os.path.join(file_path, ind_filename) | |
| 164 | |
| 165 # Check if previous files should be overwriten | |
| 166 if not overwrite: | |
| 167 # Check if the file already exists | |
| 168 if os.path.isfile(ind_filename): | |
| 169 raise IOError('Individuals file exists.') | |
| 170 | |
| 171 # Create the population file | |
| 172 ind_file = open(ind_filename, 'w') | |
| 173 ind_file.write('%s\n' %'\n'.join(self.inds)) | |
| 174 ind_file.close() | |
| 175 | |
| 176 # Save the individuals filename | |
| 177 self.ind_file = ind_filename | |
| 178 | |
| 179 def delete_ind_file (self): | |
| 180 # Check if an individuals file was created | |
| 181 if self.ind_file: | |
| 182 | |
| 183 # Delete the individuals file | |
| 184 os.remove(self.ind_file) | |
| 185 | |
| 186 # Remove the filename | |
| 187 self.ind_file = '' | |
| 188 | |
| 189 def read_model_file (model_filename): | |
| 190 | |
| 191 # Check that the file exists | |
| 192 if not os.path.isfile(model_filename): | |
| 193 raise IOError | |
| 194 | |
| 195 # Create ModelFile object | |
| 196 models_to_return = ModelFile() | |
| 197 | |
| 198 # Check if using python 2 or 3 | |
| 199 if sys.version_info[0] == 2: | |
| 200 # Open the model file in python 2 | |
| 201 model_file = open(model_filename, 'rU') | |
| 202 else: | |
| 203 # Open the model file in python 3 | |
| 204 model_file = open(model_filename, 'r', newline=None) | |
| 205 | |
| 206 # Parse the model file using the json reader | |
| 207 models_dict = json.load(model_file) | |
| 208 | |
| 209 # List to store all unique individuals (i.e. individuals in all models) | |
| 210 individual_list = [] | |
| 211 | |
| 212 # Loop the parsed models | |
| 213 for model_dict in models_dict: | |
| 214 | |
| 215 # Create the model | |
| 216 model = Model(model_dict['name']) | |
| 217 | |
| 218 # Loop the populations in the model | |
| 219 for pop, pop_dict in model_dict['pops'].items(): | |
| 220 | |
| 221 # Assign the population ans it's individuals to the model | |
| 222 model.assign_pop(pop, pop_dict['inds']) | |
| 223 # Assign the individuals to the unique individual list | |
| 224 individual_list.extend(pop_dict['inds']) | |
| 225 | |
| 226 # Remove duplicates from the unique individual list | |
| 227 individual_list = list(set(individual_list)) | |
| 228 | |
| 229 # Save the model | |
| 230 models_to_return[str(model.name)] = model | |
| 231 | |
| 232 # Store the unique individuals within the ModelFile object | |
| 233 models_to_return.assign_inds(individual_list) | |
| 234 | |
| 235 # Return the models | |
| 236 return models_to_return |
