Mercurial > repos > rakesh4osdd > clsi_profile
comparison clsi_profile.py @ 4:2a5861818faf draft
"planemo upload for repository https://github.com/rakesh4osdd/asist/tree/master commit 7169e85ac6d230af9309b0deacecd4dc61e0a915"
author | rakesh4osdd |
---|---|
date | Tue, 29 Jun 2021 12:15:06 +0000 |
parents | 683299422575 |
children | 3c27e5c2a8e9 |
comparison
equal
deleted
inserted
replaced
3:683299422575 | 4:2a5861818faf |
---|---|
1 #!/usr/bin/env python | 1 #!/usr/bin/env python |
2 # coding: utf-8 | 2 # coding: utf-8 |
3 | 3 |
4 # In[206]: | 4 # In[115]: |
5 | 5 |
6 | 6 |
7 # ASIST module2 | map AST result to the CLSI breakporints with combination antibiotics | 7 # ASIST module2 | map AST result to the CLSI breakporints with combination antibiotics |
8 # By rakesh4osdd@gmail.com, 06-Jun-2021 | 8 # By rakesh4osdd@gmail.com, 06-Jun-2021 |
9 import pandas as pd | 9 import pandas as pd |
10 import re | 10 import re |
11 import sys | 11 import sys |
12 | 12 |
13 | 13 |
14 # In[207]: | 14 # In[116]: |
15 | 15 |
16 | 16 |
17 #print(pd.__version__, re.__version__) | 17 #print(pd.__version__, re.__version__) |
18 | 18 |
19 | 19 |
20 # In[208]: | 20 # In[117]: |
21 | 21 |
22 | 22 |
23 # compare two MIC value strings | 23 # compare two MIC value strings |
24 def check_mic(mic1,mic2,mic_type): | 24 def check_mic(mic1,mic2,mic_type): |
25 #print(mic1,mic2,mic_type) | 25 #print(mic1,mic2,mic_type) |
93 elif (mic_type=='r' and (float(m1a)>=float(m2a))): | 93 elif (mic_type=='r' and (float(m1a)>=float(m2a))): |
94 m_type='Resistant' | 94 m_type='Resistant' |
95 elif (mic_type=='i' and (float(m1a)==float(m2a))): | 95 elif (mic_type=='i' and (float(m1a)==float(m2a))): |
96 m_type='Intermediate' | 96 m_type='Intermediate' |
97 else: | 97 else: |
98 m_type='Strain could not be classified-1' | 98 m_type='Strain could not be classified' |
99 except IndexError: | 99 except IndexError: |
100 strain_type='Strain could not be classified-2' | 100 strain_type='Strain could not be classified' |
101 return(strain_type) | 101 return(strain_type) |
102 | 102 |
103 return(m_type) | 103 return(m_type) |
104 | 104 |
105 #check_mic('65','32-64','i') | 105 #check_mic('65','32-64','i') |
106 | 106 |
107 | 107 |
108 # In[209]: | 108 # In[118]: |
109 | 109 |
110 | 110 |
111 # compare MIC value in pandas list | 111 # compare MIC value in pandas list |
112 def sus_res_int(mic): | 112 def sus_res_int(mic): |
113 #print(mic) | 113 #print(mic) |
130 | 130 |
131 #mic=['128','16/4','128/4','32/4-64/4'] | 131 #mic=['128','16/4','128/4','32/4-64/4'] |
132 #sus_res_int(mic) | 132 #sus_res_int(mic) |
133 | 133 |
134 | 134 |
135 # In[210]: | 135 # In[119]: |
136 | 136 |
137 | 137 |
138 # for input argument | 138 # for input argument |
139 input_user = sys.argv[1] | 139 input_user = sys.argv[1] |
140 input_clsi = sys.argv[2] | 140 input_clsi = sys.argv[2] |
141 output_table = sys.argv[3] | 141 output_table = sys.argv[3] |
142 | 142 |
143 | 143 |
144 # In[211]: | 144 # In[3]: |
145 | 145 |
146 """ | 146 |
147 input_user='input.csv' | 147 """#input_user='~/Jupyterlab_notebook/ASIST_module/strain_profiles_16k.csv.csv' |
148 input_clsi='clsi.csv' | 148 input_user='test-data/input2.csv' |
149 output_profile=input_user+'_profile.csv' | 149 input_clsi='test-data/clsi.csv' |
150 output_table=input_user+'_table.csv' | 150 output_profile='test-data/input2_profile.csv' |
151 """ | 151 output_table='test-data/input2_table.csv' |
152 | 152 #output_table='/home/rakesh/Jupyterlab_notebook/ASIST_module/strain_profiles_16k_table.csv'""" |
153 # In[212]: | 153 |
154 | |
155 # In[146]: | |
154 | 156 |
155 | 157 |
156 # read user AST data with selected 3 columns | 158 # read user AST data with selected 3 columns |
157 strain_mic=pd.read_csv(input_user, sep=',', usecols =['Strain name', 'Antibiotics', 'MIC'],na_filter=False) | 159 strain_mic=pd.read_csv(input_user, sep=',', usecols =['Strain name', 'Antibiotics', 'MIC'],na_filter=False) |
158 | 160 #strain_mic |
159 | 161 |
160 # In[213]: | 162 |
163 # In[147]: | |
161 | 164 |
162 | 165 |
163 clsi_bp=pd.read_csv(input_clsi,sep=',') | 166 clsi_bp=pd.read_csv(input_clsi,sep=',') |
164 | 167 |
165 | 168 #clsi_bp[clsi_bp[['Antibiotics', 'Susceptible']].duplicated()].shape |
166 # In[214]: | 169 |
170 | |
171 # In[148]: | |
167 | 172 |
168 | 173 |
169 #clsi_bp | 174 #clsi_bp |
170 #strain_mic | 175 #strain_mic |
171 | 176 |
172 | 177 |
173 # In[215]: | 178 # In[149]: |
179 | |
180 | |
181 input_dups=strain_mic[strain_mic[['Strain name','Antibiotics']].duplicated()] | |
182 if (input_dups.shape[0] == 0): | |
183 #print( "No duplicates") | |
184 pass | |
185 else: | |
186 input_dups.to_csv(output_table,na_rep='NA') | |
187 with open(output_table, "a") as file_object: | |
188 # Append 'hello' at the end of file | |
189 file_object.write('Input File Error: Please remove duplicate/mutiple MIC values for same combination of Strain name and Antibiotics from input file') | |
190 exit() | |
191 #input_dups.head() | |
192 | |
193 | |
194 # In[125]: | |
174 | 195 |
175 | 196 |
176 # convert MIC to numbers sMIC, rMIC | 197 # convert MIC to numbers sMIC, rMIC |
177 clsi_bp['s_mic'] =clsi_bp[['Susceptible']].applymap(lambda x: (re.sub(r'[^0-9.\/-]', '', x))) | 198 clsi_bp['s_mic'] =clsi_bp[['Susceptible']].applymap(lambda x: (re.sub(r'[^0-9.\/-]', '', x))) |
178 clsi_bp['r_mic'] =clsi_bp[['Resistant']].applymap(lambda x: (re.sub(r'[^0-9.\/-]', '', x))) | 199 clsi_bp['r_mic'] =clsi_bp[['Resistant']].applymap(lambda x: (re.sub(r'[^0-9.\/-]', '', x))) |
179 clsi_bp['i_mic'] = clsi_bp[['Intermediate']].applymap(lambda x: (re.sub(r'[^0-9.\/-]', '', x))) | 200 clsi_bp['i_mic'] = clsi_bp[['Intermediate']].applymap(lambda x: (re.sub(r'[^0-9.\/-]', '', x))) |
180 | 201 |
181 | 202 |
182 # In[216]: | 203 # In[126]: |
183 | 204 |
184 | 205 |
185 #clsi_bp['i_mic'] = clsi_bp[['Intermediate']].applymap(lambda x: (re.sub(r'[^0-9.\/-]', '', x))) | 206 #clsi_bp['i_mic'] = clsi_bp[['Intermediate']].applymap(lambda x: (re.sub(r'[^0-9.\/-]', '', x))) |
186 | 207 |
187 | 208 |
188 # In[217]: | 209 # In[127]: |
189 | 210 |
190 | 211 |
191 # Read only numbers in MIC values | 212 # Read only numbers in MIC values |
192 #try: | 213 #try: |
193 strain_mic['o_mic']=strain_mic[['MIC']].applymap(lambda x: (re.sub(r'[^0-9.\/]','', x))) | 214 strain_mic['o_mic']=strain_mic[['MIC']].applymap(lambda x: (re.sub(r'[^0-9.\/]','', x))) |
194 #except TypeError: | 215 #except TypeError: |
195 # print('Waring: Error in MIC value') | 216 # print('Waring: Error in MIC value') |
196 | 217 |
197 | 218 |
198 # In[218]: | 219 # In[128]: |
199 | 220 |
200 | 221 |
201 #strain_mic | 222 #strain_mic |
202 | 223 |
203 | 224 |
204 # In[219]: | 225 # In[129]: |
205 | 226 |
206 | 227 |
207 # capitalize each Antibiotic Name for comparision with removing whitespace | 228 # capitalize each Antibiotic Name for comparision with removing whitespace |
229 strain_mic['Strain name']=strain_mic['Strain name'].str.capitalize().str.replace(" ","") | |
208 strain_mic['Antibiotics']=strain_mic['Antibiotics'].str.capitalize().str.replace(" ","") | 230 strain_mic['Antibiotics']=strain_mic['Antibiotics'].str.capitalize().str.replace(" ","") |
231 | |
209 clsi_bp['Antibiotics']=clsi_bp['Antibiotics'].str.capitalize().str.replace(" ","") | 232 clsi_bp['Antibiotics']=clsi_bp['Antibiotics'].str.capitalize().str.replace(" ","") |
210 | 233 |
211 | 234 |
212 # In[220]: | 235 # In[130]: |
213 | 236 |
214 | 237 |
215 #compare CLSI Antibiotics only | 238 #find duplicate values in input files |
216 #result=pd.merge(strain_mic, clsi_bp, on='Antibiotics',how='inner', indicator=True)[['Strain name','Antibiotics', 'MIC', 'o_mic', 's_mic', 'r_mic','_merge']] | 239 dups=strain_mic[strain_mic[['Strain name', 'Antibiotics']].duplicated(keep=False)] |
217 try: | 240 if dups.shape[0] != 0: |
218 result=pd.merge(strain_mic, clsi_bp, on='Antibiotics',how='inner')[['Strain name','Antibiotics', 'MIC', 'o_mic', 's_mic', 'r_mic','i_mic']] | 241 print ('Please provide a single MIC value in input file for given duplicates combination of \'Strain name and Antibiotics\' to use the tool:-\n',dups) |
219 except KeyError: | 242 #exit() |
220 print('Waring: Error in input Values') | 243 else: |
221 | 244 #compare CLSI Antibiotics only |
222 | 245 #result=pd.merge(strain_mic, clsi_bp, on='Antibiotics',how='inner', indicator=True)[['Strain name','Antibiotics', 'MIC', 'o_mic', 's_mic', 'r_mic','_merge']] |
223 # In[221]: | 246 try: |
247 result=pd.merge(strain_mic, clsi_bp, on='Antibiotics',how='inner')[['Strain name','Antibiotics', 'MIC', 'o_mic', 's_mic', 'r_mic','i_mic']] | |
248 except KeyError: | |
249 print('Waring: Error in input Values') | |
250 | |
251 | |
252 # In[131]: | |
253 | |
254 | |
255 #result | |
256 | |
257 | |
258 # In[132]: | |
224 | 259 |
225 | 260 |
226 #compare MIC values and assign Susceptible and Resistant to Strain | 261 #compare MIC values and assign Susceptible and Resistant to Strain |
227 #try: | 262 #try: |
228 result[['CLSI_profile']] = result[['o_mic','s_mic','r_mic','i_mic']].apply(sus_res_int,axis = 1) | 263 result[['CLSI_profile']] = result[['o_mic','s_mic','r_mic','i_mic']].apply(sus_res_int,axis = 1) |
229 #except ValueError: | 264 #except ValueError: |
230 # print('Waring: Error in input MIC value') | 265 # print('Waring: Error in input MIC value') |
231 | 266 |
232 | 267 |
233 # In[222]: | 268 # In[133]: |
234 | 269 |
235 | 270 |
236 #result | 271 #result |
237 | 272 |
238 | 273 |
239 # In[223]: | 274 # In[134]: |
240 | 275 |
241 | 276 |
242 #result[['Strain name', 'Antibiotics', 'MIC','s_mic','r_mic','CLSI_profile']].to_csv(output_profile,sep=',', index=False, encoding='utf-8-sig') | 277 #result[['Strain name', 'Antibiotics', 'MIC','s_mic','r_mic','CLSI_profile']].to_csv(output_profile,sep=',', index=False, encoding='utf-8-sig') |
243 | 278 |
244 | 279 |
245 # In[224]: | 280 # In[135]: |
246 | 281 |
247 | 282 |
248 #create a pivot table for ASIST | 283 #create a pivot table for ASIST |
249 table=result[['Strain name', 'Antibiotics','CLSI_profile']].drop_duplicates() | 284 table=result[['Strain name', 'Antibiotics','CLSI_profile']].drop_duplicates() |
250 result_table=pd.pivot_table(table, values ='CLSI_profile', index =['Strain name'],columns =['Antibiotics'], aggfunc = lambda x: ' '.join(x)) | 285 result_table=pd.pivot_table(table, values ='CLSI_profile', index =['Strain name'],columns =['Antibiotics'], aggfunc = lambda x: ' '.join(x)) |
251 | 286 |
252 | 287 |
253 # In[225]: | 288 # In[136]: |
254 | 289 |
255 | 290 |
256 #result_table | 291 #result_table |
257 | 292 |
258 | 293 |
259 # In[226]: | 294 # In[137]: |
260 | 295 |
261 | 296 |
262 #result_table.to_csv(output_table,na_rep='NA') | 297 #result_table.to_csv(output_table,na_rep='NA') |
263 | 298 |
264 | 299 |
265 # In[227]: | 300 # In[138]: |
266 | 301 |
267 | 302 |
268 # reorder the Antibiotics for ASIST | 303 # reorder the Antibiotics for ASIST |
269 clsi_ab=['Amikacin','Tobramycin','Gentamycin','Imipenem','Meropenem','Doripenem','Ciprofloxacin','Levofloxacin', | 304 clsi_ab=['Amikacin','Tobramycin','Gentamycin','Netilmicin','Imipenem','Meropenem','Doripenem','Ciprofloxacin','Levofloxacin', |
270 'Piperacillin/tazobactam','Ticarcillin/clavulanicacid','Cefotaxime','Ceftriaxone','Ceftazidime','Cefepime', | 305 'Piperacillin/tazobactam','Ticarcillin/clavulanicacid','Cefotaxime','Ceftriaxone','Ceftazidime','Cefepime', |
271 'Trimethoprim/sulfamethoxazole','Ampicillin/sulbactam','Colistin','Polymyxinb','Tetracycline','Doxicycline ', | 306 'Trimethoprim/sulfamethoxazole','Ampicillin/sulbactam','Colistin','Polymyxinb','Tetracycline','Doxicycline ', |
272 'Minocycline'] | 307 'Minocycline'] |
273 result_selected=result_table.filter(clsi_ab) | 308 result_selected=result_table.filter(clsi_ab) |
274 | 309 |
275 | 310 |
276 # In[228]: | 311 # In[139]: |
277 | 312 |
278 | 313 |
279 #print(result_selected.shape, result_table.shape) | 314 #print(result_selected.shape, result_table.shape) |
280 | 315 |
281 | 316 |
282 # In[229]: | 317 # In[140]: |
283 | 318 |
284 | 319 |
285 #result_selected.insert(0,'Resistance_phenotype','') | 320 #result_selected.insert(0,'Resistance_phenotype','') |
286 | 321 |
287 | 322 |
288 # In[230]: | 323 # In[141]: |
289 | 324 |
290 | 325 |
291 #rename headers | 326 #rename headers |
292 result_selected=result_selected.rename(columns = {'Ticarcillin/clavulanicacid':'Ticarcillin/clavulanic acid','Piperacillin/tazobactam':'Piperacillin/ tazobactam','Trimethoprim/sulfamethoxazole': 'Trimethoprim/ sulfamethoxazole','Ampicillin/sulbactam':'Ampicillin/ sulbactam', 'Polymyxinb': 'Polymyxin B'} ) | 327 result_selected=result_selected.rename(columns = {'Ticarcillin/clavulanicacid':'Ticarcillin/clavulanic acid','Piperacillin/tazobactam':'Piperacillin/ tazobactam','Trimethoprim/sulfamethoxazole': 'Trimethoprim/ sulfamethoxazole','Ampicillin/sulbactam':'Ampicillin/ sulbactam', 'Polymyxinb': 'Polymyxin B'} ) |
293 | 328 |
294 | 329 |
295 # In[231]: | 330 # In[142]: |
296 | 331 |
297 | 332 |
298 #result_selected | 333 #result_selected |
299 | 334 |
300 | 335 |
301 # In[232]: | 336 # In[144]: |
302 | 337 |
303 | 338 |
304 result_selected.to_csv(output_table,na_rep='NA') | 339 result_selected.to_csv(output_table,na_rep='NA') |
305 | 340 |