Mercurial > repos > rakesh4osdd > clsi_profile
comparison clsi_profile.py @ 4:2a5861818faf draft
"planemo upload for repository https://github.com/rakesh4osdd/asist/tree/master commit 7169e85ac6d230af9309b0deacecd4dc61e0a915"
| author | rakesh4osdd |
|---|---|
| date | Tue, 29 Jun 2021 12:15:06 +0000 |
| parents | 683299422575 |
| children | 3c27e5c2a8e9 |
comparison
equal
deleted
inserted
replaced
| 3:683299422575 | 4:2a5861818faf |
|---|---|
| 1 #!/usr/bin/env python | 1 #!/usr/bin/env python |
| 2 # coding: utf-8 | 2 # coding: utf-8 |
| 3 | 3 |
| 4 # In[206]: | 4 # In[115]: |
| 5 | 5 |
| 6 | 6 |
| 7 # ASIST module2 | map AST result to the CLSI breakporints with combination antibiotics | 7 # ASIST module2 | map AST result to the CLSI breakporints with combination antibiotics |
| 8 # By rakesh4osdd@gmail.com, 06-Jun-2021 | 8 # By rakesh4osdd@gmail.com, 06-Jun-2021 |
| 9 import pandas as pd | 9 import pandas as pd |
| 10 import re | 10 import re |
| 11 import sys | 11 import sys |
| 12 | 12 |
| 13 | 13 |
| 14 # In[207]: | 14 # In[116]: |
| 15 | 15 |
| 16 | 16 |
| 17 #print(pd.__version__, re.__version__) | 17 #print(pd.__version__, re.__version__) |
| 18 | 18 |
| 19 | 19 |
| 20 # In[208]: | 20 # In[117]: |
| 21 | 21 |
| 22 | 22 |
| 23 # compare two MIC value strings | 23 # compare two MIC value strings |
| 24 def check_mic(mic1,mic2,mic_type): | 24 def check_mic(mic1,mic2,mic_type): |
| 25 #print(mic1,mic2,mic_type) | 25 #print(mic1,mic2,mic_type) |
| 93 elif (mic_type=='r' and (float(m1a)>=float(m2a))): | 93 elif (mic_type=='r' and (float(m1a)>=float(m2a))): |
| 94 m_type='Resistant' | 94 m_type='Resistant' |
| 95 elif (mic_type=='i' and (float(m1a)==float(m2a))): | 95 elif (mic_type=='i' and (float(m1a)==float(m2a))): |
| 96 m_type='Intermediate' | 96 m_type='Intermediate' |
| 97 else: | 97 else: |
| 98 m_type='Strain could not be classified-1' | 98 m_type='Strain could not be classified' |
| 99 except IndexError: | 99 except IndexError: |
| 100 strain_type='Strain could not be classified-2' | 100 strain_type='Strain could not be classified' |
| 101 return(strain_type) | 101 return(strain_type) |
| 102 | 102 |
| 103 return(m_type) | 103 return(m_type) |
| 104 | 104 |
| 105 #check_mic('65','32-64','i') | 105 #check_mic('65','32-64','i') |
| 106 | 106 |
| 107 | 107 |
| 108 # In[209]: | 108 # In[118]: |
| 109 | 109 |
| 110 | 110 |
| 111 # compare MIC value in pandas list | 111 # compare MIC value in pandas list |
| 112 def sus_res_int(mic): | 112 def sus_res_int(mic): |
| 113 #print(mic) | 113 #print(mic) |
| 130 | 130 |
| 131 #mic=['128','16/4','128/4','32/4-64/4'] | 131 #mic=['128','16/4','128/4','32/4-64/4'] |
| 132 #sus_res_int(mic) | 132 #sus_res_int(mic) |
| 133 | 133 |
| 134 | 134 |
| 135 # In[210]: | 135 # In[119]: |
| 136 | 136 |
| 137 | 137 |
| 138 # for input argument | 138 # for input argument |
| 139 input_user = sys.argv[1] | 139 input_user = sys.argv[1] |
| 140 input_clsi = sys.argv[2] | 140 input_clsi = sys.argv[2] |
| 141 output_table = sys.argv[3] | 141 output_table = sys.argv[3] |
| 142 | 142 |
| 143 | 143 |
| 144 # In[211]: | 144 # In[3]: |
| 145 | 145 |
| 146 """ | 146 |
| 147 input_user='input.csv' | 147 """#input_user='~/Jupyterlab_notebook/ASIST_module/strain_profiles_16k.csv.csv' |
| 148 input_clsi='clsi.csv' | 148 input_user='test-data/input2.csv' |
| 149 output_profile=input_user+'_profile.csv' | 149 input_clsi='test-data/clsi.csv' |
| 150 output_table=input_user+'_table.csv' | 150 output_profile='test-data/input2_profile.csv' |
| 151 """ | 151 output_table='test-data/input2_table.csv' |
| 152 | 152 #output_table='/home/rakesh/Jupyterlab_notebook/ASIST_module/strain_profiles_16k_table.csv'""" |
| 153 # In[212]: | 153 |
| 154 | |
| 155 # In[146]: | |
| 154 | 156 |
| 155 | 157 |
| 156 # read user AST data with selected 3 columns | 158 # read user AST data with selected 3 columns |
| 157 strain_mic=pd.read_csv(input_user, sep=',', usecols =['Strain name', 'Antibiotics', 'MIC'],na_filter=False) | 159 strain_mic=pd.read_csv(input_user, sep=',', usecols =['Strain name', 'Antibiotics', 'MIC'],na_filter=False) |
| 158 | 160 #strain_mic |
| 159 | 161 |
| 160 # In[213]: | 162 |
| 163 # In[147]: | |
| 161 | 164 |
| 162 | 165 |
| 163 clsi_bp=pd.read_csv(input_clsi,sep=',') | 166 clsi_bp=pd.read_csv(input_clsi,sep=',') |
| 164 | 167 |
| 165 | 168 #clsi_bp[clsi_bp[['Antibiotics', 'Susceptible']].duplicated()].shape |
| 166 # In[214]: | 169 |
| 170 | |
| 171 # In[148]: | |
| 167 | 172 |
| 168 | 173 |
| 169 #clsi_bp | 174 #clsi_bp |
| 170 #strain_mic | 175 #strain_mic |
| 171 | 176 |
| 172 | 177 |
| 173 # In[215]: | 178 # In[149]: |
| 179 | |
| 180 | |
| 181 input_dups=strain_mic[strain_mic[['Strain name','Antibiotics']].duplicated()] | |
| 182 if (input_dups.shape[0] == 0): | |
| 183 #print( "No duplicates") | |
| 184 pass | |
| 185 else: | |
| 186 input_dups.to_csv(output_table,na_rep='NA') | |
| 187 with open(output_table, "a") as file_object: | |
| 188 # Append 'hello' at the end of file | |
| 189 file_object.write('Input File Error: Please remove duplicate/mutiple MIC values for same combination of Strain name and Antibiotics from input file') | |
| 190 exit() | |
| 191 #input_dups.head() | |
| 192 | |
| 193 | |
| 194 # In[125]: | |
| 174 | 195 |
| 175 | 196 |
| 176 # convert MIC to numbers sMIC, rMIC | 197 # convert MIC to numbers sMIC, rMIC |
| 177 clsi_bp['s_mic'] =clsi_bp[['Susceptible']].applymap(lambda x: (re.sub(r'[^0-9.\/-]', '', x))) | 198 clsi_bp['s_mic'] =clsi_bp[['Susceptible']].applymap(lambda x: (re.sub(r'[^0-9.\/-]', '', x))) |
| 178 clsi_bp['r_mic'] =clsi_bp[['Resistant']].applymap(lambda x: (re.sub(r'[^0-9.\/-]', '', x))) | 199 clsi_bp['r_mic'] =clsi_bp[['Resistant']].applymap(lambda x: (re.sub(r'[^0-9.\/-]', '', x))) |
| 179 clsi_bp['i_mic'] = clsi_bp[['Intermediate']].applymap(lambda x: (re.sub(r'[^0-9.\/-]', '', x))) | 200 clsi_bp['i_mic'] = clsi_bp[['Intermediate']].applymap(lambda x: (re.sub(r'[^0-9.\/-]', '', x))) |
| 180 | 201 |
| 181 | 202 |
| 182 # In[216]: | 203 # In[126]: |
| 183 | 204 |
| 184 | 205 |
| 185 #clsi_bp['i_mic'] = clsi_bp[['Intermediate']].applymap(lambda x: (re.sub(r'[^0-9.\/-]', '', x))) | 206 #clsi_bp['i_mic'] = clsi_bp[['Intermediate']].applymap(lambda x: (re.sub(r'[^0-9.\/-]', '', x))) |
| 186 | 207 |
| 187 | 208 |
| 188 # In[217]: | 209 # In[127]: |
| 189 | 210 |
| 190 | 211 |
| 191 # Read only numbers in MIC values | 212 # Read only numbers in MIC values |
| 192 #try: | 213 #try: |
| 193 strain_mic['o_mic']=strain_mic[['MIC']].applymap(lambda x: (re.sub(r'[^0-9.\/]','', x))) | 214 strain_mic['o_mic']=strain_mic[['MIC']].applymap(lambda x: (re.sub(r'[^0-9.\/]','', x))) |
| 194 #except TypeError: | 215 #except TypeError: |
| 195 # print('Waring: Error in MIC value') | 216 # print('Waring: Error in MIC value') |
| 196 | 217 |
| 197 | 218 |
| 198 # In[218]: | 219 # In[128]: |
| 199 | 220 |
| 200 | 221 |
| 201 #strain_mic | 222 #strain_mic |
| 202 | 223 |
| 203 | 224 |
| 204 # In[219]: | 225 # In[129]: |
| 205 | 226 |
| 206 | 227 |
| 207 # capitalize each Antibiotic Name for comparision with removing whitespace | 228 # capitalize each Antibiotic Name for comparision with removing whitespace |
| 229 strain_mic['Strain name']=strain_mic['Strain name'].str.capitalize().str.replace(" ","") | |
| 208 strain_mic['Antibiotics']=strain_mic['Antibiotics'].str.capitalize().str.replace(" ","") | 230 strain_mic['Antibiotics']=strain_mic['Antibiotics'].str.capitalize().str.replace(" ","") |
| 231 | |
| 209 clsi_bp['Antibiotics']=clsi_bp['Antibiotics'].str.capitalize().str.replace(" ","") | 232 clsi_bp['Antibiotics']=clsi_bp['Antibiotics'].str.capitalize().str.replace(" ","") |
| 210 | 233 |
| 211 | 234 |
| 212 # In[220]: | 235 # In[130]: |
| 213 | 236 |
| 214 | 237 |
| 215 #compare CLSI Antibiotics only | 238 #find duplicate values in input files |
| 216 #result=pd.merge(strain_mic, clsi_bp, on='Antibiotics',how='inner', indicator=True)[['Strain name','Antibiotics', 'MIC', 'o_mic', 's_mic', 'r_mic','_merge']] | 239 dups=strain_mic[strain_mic[['Strain name', 'Antibiotics']].duplicated(keep=False)] |
| 217 try: | 240 if dups.shape[0] != 0: |
| 218 result=pd.merge(strain_mic, clsi_bp, on='Antibiotics',how='inner')[['Strain name','Antibiotics', 'MIC', 'o_mic', 's_mic', 'r_mic','i_mic']] | 241 print ('Please provide a single MIC value in input file for given duplicates combination of \'Strain name and Antibiotics\' to use the tool:-\n',dups) |
| 219 except KeyError: | 242 #exit() |
| 220 print('Waring: Error in input Values') | 243 else: |
| 221 | 244 #compare CLSI Antibiotics only |
| 222 | 245 #result=pd.merge(strain_mic, clsi_bp, on='Antibiotics',how='inner', indicator=True)[['Strain name','Antibiotics', 'MIC', 'o_mic', 's_mic', 'r_mic','_merge']] |
| 223 # In[221]: | 246 try: |
| 247 result=pd.merge(strain_mic, clsi_bp, on='Antibiotics',how='inner')[['Strain name','Antibiotics', 'MIC', 'o_mic', 's_mic', 'r_mic','i_mic']] | |
| 248 except KeyError: | |
| 249 print('Waring: Error in input Values') | |
| 250 | |
| 251 | |
| 252 # In[131]: | |
| 253 | |
| 254 | |
| 255 #result | |
| 256 | |
| 257 | |
| 258 # In[132]: | |
| 224 | 259 |
| 225 | 260 |
| 226 #compare MIC values and assign Susceptible and Resistant to Strain | 261 #compare MIC values and assign Susceptible and Resistant to Strain |
| 227 #try: | 262 #try: |
| 228 result[['CLSI_profile']] = result[['o_mic','s_mic','r_mic','i_mic']].apply(sus_res_int,axis = 1) | 263 result[['CLSI_profile']] = result[['o_mic','s_mic','r_mic','i_mic']].apply(sus_res_int,axis = 1) |
| 229 #except ValueError: | 264 #except ValueError: |
| 230 # print('Waring: Error in input MIC value') | 265 # print('Waring: Error in input MIC value') |
| 231 | 266 |
| 232 | 267 |
| 233 # In[222]: | 268 # In[133]: |
| 234 | 269 |
| 235 | 270 |
| 236 #result | 271 #result |
| 237 | 272 |
| 238 | 273 |
| 239 # In[223]: | 274 # In[134]: |
| 240 | 275 |
| 241 | 276 |
| 242 #result[['Strain name', 'Antibiotics', 'MIC','s_mic','r_mic','CLSI_profile']].to_csv(output_profile,sep=',', index=False, encoding='utf-8-sig') | 277 #result[['Strain name', 'Antibiotics', 'MIC','s_mic','r_mic','CLSI_profile']].to_csv(output_profile,sep=',', index=False, encoding='utf-8-sig') |
| 243 | 278 |
| 244 | 279 |
| 245 # In[224]: | 280 # In[135]: |
| 246 | 281 |
| 247 | 282 |
| 248 #create a pivot table for ASIST | 283 #create a pivot table for ASIST |
| 249 table=result[['Strain name', 'Antibiotics','CLSI_profile']].drop_duplicates() | 284 table=result[['Strain name', 'Antibiotics','CLSI_profile']].drop_duplicates() |
| 250 result_table=pd.pivot_table(table, values ='CLSI_profile', index =['Strain name'],columns =['Antibiotics'], aggfunc = lambda x: ' '.join(x)) | 285 result_table=pd.pivot_table(table, values ='CLSI_profile', index =['Strain name'],columns =['Antibiotics'], aggfunc = lambda x: ' '.join(x)) |
| 251 | 286 |
| 252 | 287 |
| 253 # In[225]: | 288 # In[136]: |
| 254 | 289 |
| 255 | 290 |
| 256 #result_table | 291 #result_table |
| 257 | 292 |
| 258 | 293 |
| 259 # In[226]: | 294 # In[137]: |
| 260 | 295 |
| 261 | 296 |
| 262 #result_table.to_csv(output_table,na_rep='NA') | 297 #result_table.to_csv(output_table,na_rep='NA') |
| 263 | 298 |
| 264 | 299 |
| 265 # In[227]: | 300 # In[138]: |
| 266 | 301 |
| 267 | 302 |
| 268 # reorder the Antibiotics for ASIST | 303 # reorder the Antibiotics for ASIST |
| 269 clsi_ab=['Amikacin','Tobramycin','Gentamycin','Imipenem','Meropenem','Doripenem','Ciprofloxacin','Levofloxacin', | 304 clsi_ab=['Amikacin','Tobramycin','Gentamycin','Netilmicin','Imipenem','Meropenem','Doripenem','Ciprofloxacin','Levofloxacin', |
| 270 'Piperacillin/tazobactam','Ticarcillin/clavulanicacid','Cefotaxime','Ceftriaxone','Ceftazidime','Cefepime', | 305 'Piperacillin/tazobactam','Ticarcillin/clavulanicacid','Cefotaxime','Ceftriaxone','Ceftazidime','Cefepime', |
| 271 'Trimethoprim/sulfamethoxazole','Ampicillin/sulbactam','Colistin','Polymyxinb','Tetracycline','Doxicycline ', | 306 'Trimethoprim/sulfamethoxazole','Ampicillin/sulbactam','Colistin','Polymyxinb','Tetracycline','Doxicycline ', |
| 272 'Minocycline'] | 307 'Minocycline'] |
| 273 result_selected=result_table.filter(clsi_ab) | 308 result_selected=result_table.filter(clsi_ab) |
| 274 | 309 |
| 275 | 310 |
| 276 # In[228]: | 311 # In[139]: |
| 277 | 312 |
| 278 | 313 |
| 279 #print(result_selected.shape, result_table.shape) | 314 #print(result_selected.shape, result_table.shape) |
| 280 | 315 |
| 281 | 316 |
| 282 # In[229]: | 317 # In[140]: |
| 283 | 318 |
| 284 | 319 |
| 285 #result_selected.insert(0,'Resistance_phenotype','') | 320 #result_selected.insert(0,'Resistance_phenotype','') |
| 286 | 321 |
| 287 | 322 |
| 288 # In[230]: | 323 # In[141]: |
| 289 | 324 |
| 290 | 325 |
| 291 #rename headers | 326 #rename headers |
| 292 result_selected=result_selected.rename(columns = {'Ticarcillin/clavulanicacid':'Ticarcillin/clavulanic acid','Piperacillin/tazobactam':'Piperacillin/ tazobactam','Trimethoprim/sulfamethoxazole': 'Trimethoprim/ sulfamethoxazole','Ampicillin/sulbactam':'Ampicillin/ sulbactam', 'Polymyxinb': 'Polymyxin B'} ) | 327 result_selected=result_selected.rename(columns = {'Ticarcillin/clavulanicacid':'Ticarcillin/clavulanic acid','Piperacillin/tazobactam':'Piperacillin/ tazobactam','Trimethoprim/sulfamethoxazole': 'Trimethoprim/ sulfamethoxazole','Ampicillin/sulbactam':'Ampicillin/ sulbactam', 'Polymyxinb': 'Polymyxin B'} ) |
| 293 | 328 |
| 294 | 329 |
| 295 # In[231]: | 330 # In[142]: |
| 296 | 331 |
| 297 | 332 |
| 298 #result_selected | 333 #result_selected |
| 299 | 334 |
| 300 | 335 |
| 301 # In[232]: | 336 # In[144]: |
| 302 | 337 |
| 303 | 338 |
| 304 result_selected.to_csv(output_table,na_rep='NA') | 339 result_selected.to_csv(output_table,na_rep='NA') |
| 305 | 340 |
