comparison clsi_profile.py @ 4:2a5861818faf draft

"planemo upload for repository https://github.com/rakesh4osdd/asist/tree/master commit 7169e85ac6d230af9309b0deacecd4dc61e0a915"
author rakesh4osdd
date Tue, 29 Jun 2021 12:15:06 +0000
parents 683299422575
children 3c27e5c2a8e9
comparison
equal deleted inserted replaced
3:683299422575 4:2a5861818faf
1 #!/usr/bin/env python 1 #!/usr/bin/env python
2 # coding: utf-8 2 # coding: utf-8
3 3
4 # In[206]: 4 # In[115]:
5 5
6 6
7 # ASIST module2 | map AST result to the CLSI breakporints with combination antibiotics 7 # ASIST module2 | map AST result to the CLSI breakporints with combination antibiotics
8 # By rakesh4osdd@gmail.com, 06-Jun-2021 8 # By rakesh4osdd@gmail.com, 06-Jun-2021
9 import pandas as pd 9 import pandas as pd
10 import re 10 import re
11 import sys 11 import sys
12 12
13 13
14 # In[207]: 14 # In[116]:
15 15
16 16
17 #print(pd.__version__, re.__version__) 17 #print(pd.__version__, re.__version__)
18 18
19 19
20 # In[208]: 20 # In[117]:
21 21
22 22
23 # compare two MIC value strings 23 # compare two MIC value strings
24 def check_mic(mic1,mic2,mic_type): 24 def check_mic(mic1,mic2,mic_type):
25 #print(mic1,mic2,mic_type) 25 #print(mic1,mic2,mic_type)
93 elif (mic_type=='r' and (float(m1a)>=float(m2a))): 93 elif (mic_type=='r' and (float(m1a)>=float(m2a))):
94 m_type='Resistant' 94 m_type='Resistant'
95 elif (mic_type=='i' and (float(m1a)==float(m2a))): 95 elif (mic_type=='i' and (float(m1a)==float(m2a))):
96 m_type='Intermediate' 96 m_type='Intermediate'
97 else: 97 else:
98 m_type='Strain could not be classified-1' 98 m_type='Strain could not be classified'
99 except IndexError: 99 except IndexError:
100 strain_type='Strain could not be classified-2' 100 strain_type='Strain could not be classified'
101 return(strain_type) 101 return(strain_type)
102 102
103 return(m_type) 103 return(m_type)
104 104
105 #check_mic('65','32-64','i') 105 #check_mic('65','32-64','i')
106 106
107 107
108 # In[209]: 108 # In[118]:
109 109
110 110
111 # compare MIC value in pandas list 111 # compare MIC value in pandas list
112 def sus_res_int(mic): 112 def sus_res_int(mic):
113 #print(mic) 113 #print(mic)
130 130
131 #mic=['128','16/4','128/4','32/4-64/4'] 131 #mic=['128','16/4','128/4','32/4-64/4']
132 #sus_res_int(mic) 132 #sus_res_int(mic)
133 133
134 134
135 # In[210]: 135 # In[119]:
136 136
137 137
138 # for input argument 138 # for input argument
139 input_user = sys.argv[1] 139 input_user = sys.argv[1]
140 input_clsi = sys.argv[2] 140 input_clsi = sys.argv[2]
141 output_table = sys.argv[3] 141 output_table = sys.argv[3]
142 142
143 143
144 # In[211]: 144 # In[3]:
145 145
146 """ 146
147 input_user='input.csv' 147 """#input_user='~/Jupyterlab_notebook/ASIST_module/strain_profiles_16k.csv.csv'
148 input_clsi='clsi.csv' 148 input_user='test-data/input2.csv'
149 output_profile=input_user+'_profile.csv' 149 input_clsi='test-data/clsi.csv'
150 output_table=input_user+'_table.csv' 150 output_profile='test-data/input2_profile.csv'
151 """ 151 output_table='test-data/input2_table.csv'
152 152 #output_table='/home/rakesh/Jupyterlab_notebook/ASIST_module/strain_profiles_16k_table.csv'"""
153 # In[212]: 153
154
155 # In[146]:
154 156
155 157
156 # read user AST data with selected 3 columns 158 # read user AST data with selected 3 columns
157 strain_mic=pd.read_csv(input_user, sep=',', usecols =['Strain name', 'Antibiotics', 'MIC'],na_filter=False) 159 strain_mic=pd.read_csv(input_user, sep=',', usecols =['Strain name', 'Antibiotics', 'MIC'],na_filter=False)
158 160 #strain_mic
159 161
160 # In[213]: 162
163 # In[147]:
161 164
162 165
163 clsi_bp=pd.read_csv(input_clsi,sep=',') 166 clsi_bp=pd.read_csv(input_clsi,sep=',')
164 167
165 168 #clsi_bp[clsi_bp[['Antibiotics', 'Susceptible']].duplicated()].shape
166 # In[214]: 169
170
171 # In[148]:
167 172
168 173
169 #clsi_bp 174 #clsi_bp
170 #strain_mic 175 #strain_mic
171 176
172 177
173 # In[215]: 178 # In[149]:
179
180
181 input_dups=strain_mic[strain_mic[['Strain name','Antibiotics']].duplicated()]
182 if (input_dups.shape[0] == 0):
183 #print( "No duplicates")
184 pass
185 else:
186 input_dups.to_csv(output_table,na_rep='NA')
187 with open(output_table, "a") as file_object:
188 # Append 'hello' at the end of file
189 file_object.write('Input File Error: Please remove duplicate/mutiple MIC values for same combination of Strain name and Antibiotics from input file')
190 exit()
191 #input_dups.head()
192
193
194 # In[125]:
174 195
175 196
176 # convert MIC to numbers sMIC, rMIC 197 # convert MIC to numbers sMIC, rMIC
177 clsi_bp['s_mic'] =clsi_bp[['Susceptible']].applymap(lambda x: (re.sub(r'[^0-9.\/-]', '', x))) 198 clsi_bp['s_mic'] =clsi_bp[['Susceptible']].applymap(lambda x: (re.sub(r'[^0-9.\/-]', '', x)))
178 clsi_bp['r_mic'] =clsi_bp[['Resistant']].applymap(lambda x: (re.sub(r'[^0-9.\/-]', '', x))) 199 clsi_bp['r_mic'] =clsi_bp[['Resistant']].applymap(lambda x: (re.sub(r'[^0-9.\/-]', '', x)))
179 clsi_bp['i_mic'] = clsi_bp[['Intermediate']].applymap(lambda x: (re.sub(r'[^0-9.\/-]', '', x))) 200 clsi_bp['i_mic'] = clsi_bp[['Intermediate']].applymap(lambda x: (re.sub(r'[^0-9.\/-]', '', x)))
180 201
181 202
182 # In[216]: 203 # In[126]:
183 204
184 205
185 #clsi_bp['i_mic'] = clsi_bp[['Intermediate']].applymap(lambda x: (re.sub(r'[^0-9.\/-]', '', x))) 206 #clsi_bp['i_mic'] = clsi_bp[['Intermediate']].applymap(lambda x: (re.sub(r'[^0-9.\/-]', '', x)))
186 207
187 208
188 # In[217]: 209 # In[127]:
189 210
190 211
191 # Read only numbers in MIC values 212 # Read only numbers in MIC values
192 #try: 213 #try:
193 strain_mic['o_mic']=strain_mic[['MIC']].applymap(lambda x: (re.sub(r'[^0-9.\/]','', x))) 214 strain_mic['o_mic']=strain_mic[['MIC']].applymap(lambda x: (re.sub(r'[^0-9.\/]','', x)))
194 #except TypeError: 215 #except TypeError:
195 # print('Waring: Error in MIC value') 216 # print('Waring: Error in MIC value')
196 217
197 218
198 # In[218]: 219 # In[128]:
199 220
200 221
201 #strain_mic 222 #strain_mic
202 223
203 224
204 # In[219]: 225 # In[129]:
205 226
206 227
207 # capitalize each Antibiotic Name for comparision with removing whitespace 228 # capitalize each Antibiotic Name for comparision with removing whitespace
229 strain_mic['Strain name']=strain_mic['Strain name'].str.capitalize().str.replace(" ","")
208 strain_mic['Antibiotics']=strain_mic['Antibiotics'].str.capitalize().str.replace(" ","") 230 strain_mic['Antibiotics']=strain_mic['Antibiotics'].str.capitalize().str.replace(" ","")
231
209 clsi_bp['Antibiotics']=clsi_bp['Antibiotics'].str.capitalize().str.replace(" ","") 232 clsi_bp['Antibiotics']=clsi_bp['Antibiotics'].str.capitalize().str.replace(" ","")
210 233
211 234
212 # In[220]: 235 # In[130]:
213 236
214 237
215 #compare CLSI Antibiotics only 238 #find duplicate values in input files
216 #result=pd.merge(strain_mic, clsi_bp, on='Antibiotics',how='inner', indicator=True)[['Strain name','Antibiotics', 'MIC', 'o_mic', 's_mic', 'r_mic','_merge']] 239 dups=strain_mic[strain_mic[['Strain name', 'Antibiotics']].duplicated(keep=False)]
217 try: 240 if dups.shape[0] != 0:
218 result=pd.merge(strain_mic, clsi_bp, on='Antibiotics',how='inner')[['Strain name','Antibiotics', 'MIC', 'o_mic', 's_mic', 'r_mic','i_mic']] 241 print ('Please provide a single MIC value in input file for given duplicates combination of \'Strain name and Antibiotics\' to use the tool:-\n',dups)
219 except KeyError: 242 #exit()
220 print('Waring: Error in input Values') 243 else:
221 244 #compare CLSI Antibiotics only
222 245 #result=pd.merge(strain_mic, clsi_bp, on='Antibiotics',how='inner', indicator=True)[['Strain name','Antibiotics', 'MIC', 'o_mic', 's_mic', 'r_mic','_merge']]
223 # In[221]: 246 try:
247 result=pd.merge(strain_mic, clsi_bp, on='Antibiotics',how='inner')[['Strain name','Antibiotics', 'MIC', 'o_mic', 's_mic', 'r_mic','i_mic']]
248 except KeyError:
249 print('Waring: Error in input Values')
250
251
252 # In[131]:
253
254
255 #result
256
257
258 # In[132]:
224 259
225 260
226 #compare MIC values and assign Susceptible and Resistant to Strain 261 #compare MIC values and assign Susceptible and Resistant to Strain
227 #try: 262 #try:
228 result[['CLSI_profile']] = result[['o_mic','s_mic','r_mic','i_mic']].apply(sus_res_int,axis = 1) 263 result[['CLSI_profile']] = result[['o_mic','s_mic','r_mic','i_mic']].apply(sus_res_int,axis = 1)
229 #except ValueError: 264 #except ValueError:
230 # print('Waring: Error in input MIC value') 265 # print('Waring: Error in input MIC value')
231 266
232 267
233 # In[222]: 268 # In[133]:
234 269
235 270
236 #result 271 #result
237 272
238 273
239 # In[223]: 274 # In[134]:
240 275
241 276
242 #result[['Strain name', 'Antibiotics', 'MIC','s_mic','r_mic','CLSI_profile']].to_csv(output_profile,sep=',', index=False, encoding='utf-8-sig') 277 #result[['Strain name', 'Antibiotics', 'MIC','s_mic','r_mic','CLSI_profile']].to_csv(output_profile,sep=',', index=False, encoding='utf-8-sig')
243 278
244 279
245 # In[224]: 280 # In[135]:
246 281
247 282
248 #create a pivot table for ASIST 283 #create a pivot table for ASIST
249 table=result[['Strain name', 'Antibiotics','CLSI_profile']].drop_duplicates() 284 table=result[['Strain name', 'Antibiotics','CLSI_profile']].drop_duplicates()
250 result_table=pd.pivot_table(table, values ='CLSI_profile', index =['Strain name'],columns =['Antibiotics'], aggfunc = lambda x: ' '.join(x)) 285 result_table=pd.pivot_table(table, values ='CLSI_profile', index =['Strain name'],columns =['Antibiotics'], aggfunc = lambda x: ' '.join(x))
251 286
252 287
253 # In[225]: 288 # In[136]:
254 289
255 290
256 #result_table 291 #result_table
257 292
258 293
259 # In[226]: 294 # In[137]:
260 295
261 296
262 #result_table.to_csv(output_table,na_rep='NA') 297 #result_table.to_csv(output_table,na_rep='NA')
263 298
264 299
265 # In[227]: 300 # In[138]:
266 301
267 302
268 # reorder the Antibiotics for ASIST 303 # reorder the Antibiotics for ASIST
269 clsi_ab=['Amikacin','Tobramycin','Gentamycin','Imipenem','Meropenem','Doripenem','Ciprofloxacin','Levofloxacin', 304 clsi_ab=['Amikacin','Tobramycin','Gentamycin','Netilmicin','Imipenem','Meropenem','Doripenem','Ciprofloxacin','Levofloxacin',
270 'Piperacillin/tazobactam','Ticarcillin/clavulanicacid','Cefotaxime','Ceftriaxone','Ceftazidime','Cefepime', 305 'Piperacillin/tazobactam','Ticarcillin/clavulanicacid','Cefotaxime','Ceftriaxone','Ceftazidime','Cefepime',
271 'Trimethoprim/sulfamethoxazole','Ampicillin/sulbactam','Colistin','Polymyxinb','Tetracycline','Doxicycline ', 306 'Trimethoprim/sulfamethoxazole','Ampicillin/sulbactam','Colistin','Polymyxinb','Tetracycline','Doxicycline ',
272 'Minocycline'] 307 'Minocycline']
273 result_selected=result_table.filter(clsi_ab) 308 result_selected=result_table.filter(clsi_ab)
274 309
275 310
276 # In[228]: 311 # In[139]:
277 312
278 313
279 #print(result_selected.shape, result_table.shape) 314 #print(result_selected.shape, result_table.shape)
280 315
281 316
282 # In[229]: 317 # In[140]:
283 318
284 319
285 #result_selected.insert(0,'Resistance_phenotype','') 320 #result_selected.insert(0,'Resistance_phenotype','')
286 321
287 322
288 # In[230]: 323 # In[141]:
289 324
290 325
291 #rename headers 326 #rename headers
292 result_selected=result_selected.rename(columns = {'Ticarcillin/clavulanicacid':'Ticarcillin/clavulanic acid','Piperacillin/tazobactam':'Piperacillin/ tazobactam','Trimethoprim/sulfamethoxazole': 'Trimethoprim/ sulfamethoxazole','Ampicillin/sulbactam':'Ampicillin/ sulbactam', 'Polymyxinb': 'Polymyxin B'} ) 327 result_selected=result_selected.rename(columns = {'Ticarcillin/clavulanicacid':'Ticarcillin/clavulanic acid','Piperacillin/tazobactam':'Piperacillin/ tazobactam','Trimethoprim/sulfamethoxazole': 'Trimethoprim/ sulfamethoxazole','Ampicillin/sulbactam':'Ampicillin/ sulbactam', 'Polymyxinb': 'Polymyxin B'} )
293 328
294 329
295 # In[231]: 330 # In[142]:
296 331
297 332
298 #result_selected 333 #result_selected
299 334
300 335
301 # In[232]: 336 # In[144]:
302 337
303 338
304 result_selected.to_csv(output_table,na_rep='NA') 339 result_selected.to_csv(output_table,na_rep='NA')
305 340