Mercurial > repos > rakesh4osdd > asist
comparison clsi_profile.py @ 0:c1a77856070c draft
"planemo upload for repository https://github.com/rakesh4osdd/asist/tree/master commit f5b374bef15145c893ffdd3a7d2f2978d8052184-dirty"
| author | rakesh4osdd |
|---|---|
| date | Sat, 26 Jun 2021 07:27:53 +0000 |
| parents | |
| children |
comparison
equal
deleted
inserted
replaced
| -1:000000000000 | 0:c1a77856070c |
|---|---|
| 1 #!/usr/bin/env python | |
| 2 # coding: utf-8 | |
| 3 | |
| 4 # In[206]: | |
| 5 | |
| 6 | |
| 7 # ASIST module2 | map AST result to the CLSI breakporints with combination antibiotics | |
| 8 # By rakesh4osdd@gmail.com, 06-Jun-2021 | |
| 9 import pandas as pd | |
| 10 import re | |
| 11 import sys | |
| 12 | |
| 13 | |
| 14 # In[207]: | |
| 15 | |
| 16 | |
| 17 #print(pd.__version__, re.__version__) | |
| 18 | |
| 19 | |
| 20 # In[208]: | |
| 21 | |
| 22 | |
| 23 # compare two MIC value strings | |
| 24 def check_mic(mic1,mic2,mic_type): | |
| 25 #print(mic1,mic2,mic_type) | |
| 26 try: | |
| 27 if '/' in mic1: | |
| 28 m1a = mic1.split('/')[0] | |
| 29 m1b = mic1.split('/')[1] | |
| 30 if float(m1a)==0 or float(m1b)==0: | |
| 31 strain_type='Strain could not be classified' | |
| 32 return(strain_type) | |
| 33 elif '/' in mic2: | |
| 34 m1a = mic1 | |
| 35 if float(m1a)==0: | |
| 36 strain_type='Strain could not be classified' | |
| 37 return(strain_type) | |
| 38 m1b = '1' | |
| 39 elif float(mic1)==0: | |
| 40 strain_type='Strain could not be classified' | |
| 41 return(strain_type) | |
| 42 else: | |
| 43 m1a = mic1 | |
| 44 | |
| 45 if '-' in mic2: | |
| 46 m2a = mic2.split('-')[0] | |
| 47 m2b = mic2.split('-')[1] | |
| 48 | |
| 49 except ValueError: | |
| 50 strain_type='Strain could not be classified' | |
| 51 return(strain_type) | |
| 52 try: | |
| 53 if '-' in mic2 and mic_type == 'i': # for intermediate only | |
| 54 if '/' in mic2: | |
| 55 m2a = mic2.split('-')[0].split('/')[0] | |
| 56 m2b = mic2.split('-')[0].split('/')[1] | |
| 57 m2aa = mic2.split('-')[1].split('/')[0] | |
| 58 m2bb = mic2.split('-')[1].split('/')[1] | |
| 59 if (float(m2aa)>=float(m1a)>=float(m2a) and float(m2bb)>=float(m1b)>=float(m2b)): | |
| 60 #print('intermediate') | |
| 61 m_type='Intermediate' | |
| 62 else: | |
| 63 #print('not define') | |
| 64 m_type='Strain could not be classified' | |
| 65 else: | |
| 66 m2a = mic2.split('-')[0] | |
| 67 m2b = mic2.split('-')[1] | |
| 68 if (float(m2b)>=float(m1a)>=float(m2a)): | |
| 69 #print('intermediate') | |
| 70 m_type='Intermediate' | |
| 71 else: | |
| 72 #print('not define') | |
| 73 m_type='Strain could not be classified' | |
| 74 #print (m1a,m1b,m2a,m2b,m2aa,m2bb) | |
| 75 elif '/' in mic2: | |
| 76 m2a = mic2.split('/')[0] | |
| 77 m2b = mic2.split('/')[1] | |
| 78 #print(m1a,m1b,m2a,m2b,mic_type) | |
| 79 if (mic_type=='s' and (float(m1a)<=float(m2a) and float(m1b)<=float(m2b))): | |
| 80 m_type='Susceptible' | |
| 81 elif (mic_type=='r' and (float(m1a)>=float(m2a) and float(m1b)>=float(m2b))): | |
| 82 m_type='Resistant' | |
| 83 elif (mic_type=='i' and (float(m1a)==float(m2a) and float(m1b)==float(m2b))): | |
| 84 m_type='Intermediate' | |
| 85 else: | |
| 86 m_type='Strain could not be classified' | |
| 87 elif '-' in mic2: | |
| 88 m_type='Strain could not be classified' | |
| 89 else: | |
| 90 m2a=mic2 | |
| 91 if (mic_type=='s' and (float(m1a)<=float(m2a))): | |
| 92 m_type='Susceptible' | |
| 93 elif (mic_type=='r' and (float(m1a)>=float(m2a))): | |
| 94 m_type='Resistant' | |
| 95 elif (mic_type=='i' and (float(m1a)==float(m2a))): | |
| 96 m_type='Intermediate' | |
| 97 else: | |
| 98 m_type='Strain could not be classified-1' | |
| 99 except IndexError: | |
| 100 strain_type='Strain could not be classified-2' | |
| 101 return(strain_type) | |
| 102 | |
| 103 return(m_type) | |
| 104 | |
| 105 #check_mic('65','32-64','i') | |
| 106 | |
| 107 | |
| 108 # In[209]: | |
| 109 | |
| 110 | |
| 111 # compare MIC value in pandas list | |
| 112 def sus_res_int(mic): | |
| 113 #print(mic) | |
| 114 o_mic = mic[0].replace(' ', '') | |
| 115 s_mic = mic[1].replace(' ', '') | |
| 116 r_mic = mic[2].replace(' ', '') | |
| 117 i_mic = mic[3].replace(' ', '') | |
| 118 try: | |
| 119 if check_mic(o_mic,s_mic,'s')=='Susceptible': | |
| 120 strain_type='Susceptible' | |
| 121 elif check_mic(o_mic,r_mic,'r')=='Resistant': | |
| 122 strain_type='Resistant' | |
| 123 elif check_mic(o_mic,i_mic,'i')=='Intermediate': | |
| 124 strain_type='Intermediate' | |
| 125 else: | |
| 126 strain_type='Strain could not be classified' | |
| 127 except ValueError: | |
| 128 strain_type='Strain could not be classified' | |
| 129 return(strain_type) | |
| 130 | |
| 131 #mic=['128','16/4','128/4','32/4-64/4'] | |
| 132 #sus_res_int(mic) | |
| 133 | |
| 134 | |
| 135 # In[210]: | |
| 136 | |
| 137 | |
| 138 # for input argument | |
| 139 input_user = sys.argv[1] | |
| 140 input_clsi = sys.argv[2] | |
| 141 output_table = sys.argv[3] | |
| 142 | |
| 143 | |
| 144 # In[211]: | |
| 145 | |
| 146 """ | |
| 147 input_user='input.csv' | |
| 148 input_clsi='clsi.csv' | |
| 149 output_profile=input_user+'_profile.csv' | |
| 150 output_table=input_user+'_table.csv' | |
| 151 """ | |
| 152 | |
| 153 # In[212]: | |
| 154 | |
| 155 | |
| 156 # read user AST data with selected 3 columns | |
| 157 strain_mic=pd.read_csv(input_user, sep=',', usecols =['Strain name', 'Antibiotics', 'MIC'],na_filter=False) | |
| 158 | |
| 159 | |
| 160 # In[213]: | |
| 161 | |
| 162 | |
| 163 clsi_bp=pd.read_csv(input_clsi,sep=',') | |
| 164 | |
| 165 | |
| 166 # In[214]: | |
| 167 | |
| 168 | |
| 169 #clsi_bp | |
| 170 #strain_mic | |
| 171 | |
| 172 | |
| 173 # In[215]: | |
| 174 | |
| 175 | |
| 176 # convert MIC to numbers sMIC, rMIC | |
| 177 clsi_bp['s_mic'] =clsi_bp[['Susceptible']].applymap(lambda x: (re.sub(r'[^0-9.\/-]', '', x))) | |
| 178 clsi_bp['r_mic'] =clsi_bp[['Resistant']].applymap(lambda x: (re.sub(r'[^0-9.\/-]', '', x))) | |
| 179 clsi_bp['i_mic'] = clsi_bp[['Intermediate']].applymap(lambda x: (re.sub(r'[^0-9.\/-]', '', x))) | |
| 180 | |
| 181 | |
| 182 # In[216]: | |
| 183 | |
| 184 | |
| 185 #clsi_bp['i_mic'] = clsi_bp[['Intermediate']].applymap(lambda x: (re.sub(r'[^0-9.\/-]', '', x))) | |
| 186 | |
| 187 | |
| 188 # In[217]: | |
| 189 | |
| 190 | |
| 191 # Read only numbers in MIC values | |
| 192 #try: | |
| 193 strain_mic['o_mic']=strain_mic[['MIC']].applymap(lambda x: (re.sub(r'[^0-9.\/]','', x))) | |
| 194 #except TypeError: | |
| 195 # print('Waring: Error in MIC value') | |
| 196 | |
| 197 | |
| 198 # In[218]: | |
| 199 | |
| 200 | |
| 201 #strain_mic | |
| 202 | |
| 203 | |
| 204 # In[219]: | |
| 205 | |
| 206 | |
| 207 # capitalize each Antibiotic Name for comparision with removing whitespace | |
| 208 strain_mic['Antibiotics']=strain_mic['Antibiotics'].str.capitalize().str.replace(" ","") | |
| 209 clsi_bp['Antibiotics']=clsi_bp['Antibiotics'].str.capitalize().str.replace(" ","") | |
| 210 | |
| 211 | |
| 212 # In[220]: | |
| 213 | |
| 214 | |
| 215 #compare CLSI Antibiotics only | |
| 216 #result=pd.merge(strain_mic, clsi_bp, on='Antibiotics',how='inner', indicator=True)[['Strain name','Antibiotics', 'MIC', 'o_mic', 's_mic', 'r_mic','_merge']] | |
| 217 try: | |
| 218 result=pd.merge(strain_mic, clsi_bp, on='Antibiotics',how='inner')[['Strain name','Antibiotics', 'MIC', 'o_mic', 's_mic', 'r_mic','i_mic']] | |
| 219 except KeyError: | |
| 220 print('Waring: Error in input Values') | |
| 221 | |
| 222 | |
| 223 # In[221]: | |
| 224 | |
| 225 | |
| 226 #compare MIC values and assign Susceptible and Resistant to Strain | |
| 227 #try: | |
| 228 result[['CLSI_profile']] = result[['o_mic','s_mic','r_mic','i_mic']].apply(sus_res_int,axis = 1) | |
| 229 #except ValueError: | |
| 230 # print('Waring: Error in input MIC value') | |
| 231 | |
| 232 | |
| 233 # In[222]: | |
| 234 | |
| 235 | |
| 236 #result | |
| 237 | |
| 238 | |
| 239 # In[223]: | |
| 240 | |
| 241 | |
| 242 #result[['Strain name', 'Antibiotics', 'MIC','s_mic','r_mic','CLSI_profile']].to_csv(output_profile,sep=',', index=False, encoding='utf-8-sig') | |
| 243 | |
| 244 | |
| 245 # In[224]: | |
| 246 | |
| 247 | |
| 248 #create a pivot table for ASIST | |
| 249 table=result[['Strain name', 'Antibiotics','CLSI_profile']].drop_duplicates() | |
| 250 result_table=pd.pivot_table(table, values ='CLSI_profile', index =['Strain name'],columns =['Antibiotics'], aggfunc = lambda x: ' '.join(x)) | |
| 251 | |
| 252 | |
| 253 # In[225]: | |
| 254 | |
| 255 | |
| 256 #result_table | |
| 257 | |
| 258 | |
| 259 # In[226]: | |
| 260 | |
| 261 | |
| 262 #result_table.to_csv(output_table,na_rep='NA') | |
| 263 | |
| 264 | |
| 265 # In[227]: | |
| 266 | |
| 267 | |
| 268 # reorder the Antibiotics for ASIST | |
| 269 clsi_ab=['Amikacin','Tobramycin','Gentamycin','Imipenem','Meropenem','Doripenem','Ciprofloxacin','Levofloxacin', | |
| 270 'Piperacillin/tazobactam','Ticarcillin/clavulanicacid','Cefotaxime','Ceftriaxone','Ceftazidime','Cefepime', | |
| 271 'Trimethoprim/sulfamethoxazole','Ampicillin/sulbactam','Colistin','Polymyxinb','Tetracycline','Doxicycline ', | |
| 272 'Minocycline'] | |
| 273 result_selected=result_table.filter(clsi_ab) | |
| 274 | |
| 275 | |
| 276 # In[228]: | |
| 277 | |
| 278 | |
| 279 #print(result_selected.shape, result_table.shape) | |
| 280 | |
| 281 | |
| 282 # In[229]: | |
| 283 | |
| 284 | |
| 285 #result_selected.insert(0,'Resistance_phenotype','') | |
| 286 | |
| 287 | |
| 288 # In[230]: | |
| 289 | |
| 290 | |
| 291 #rename headers | |
| 292 result_selected=result_selected.rename(columns = {'Ticarcillin/clavulanicacid':'Ticarcillin/clavulanic acid','Piperacillin/tazobactam':'Piperacillin/ tazobactam','Trimethoprim/sulfamethoxazole': 'Trimethoprim/ sulfamethoxazole','Ampicillin/sulbactam':'Ampicillin/ sulbactam', 'Polymyxinb': 'Polymyxin B'} ) | |
| 293 | |
| 294 | |
| 295 # In[231]: | |
| 296 | |
| 297 | |
| 298 #result_selected | |
| 299 | |
| 300 | |
| 301 # In[232]: | |
| 302 | |
| 303 | |
| 304 result_selected.to_csv(output_table,na_rep='NA') | |
| 305 |
