Mercurial > repos > rakesh4osdd > clsi_profile
comparison asist_dynamic.py @ 9:b3c01b790314 draft
"planemo upload for repository https://github.com/rakesh4osdd/asist/tree/master commit f590c3b1d71a9b8f2030909fa488b4ac0c3caed8"
| author | rakesh4osdd |
|---|---|
| date | Wed, 30 Jun 2021 06:37:12 +0000 |
| parents | |
| children |
comparison
equal
deleted
inserted
replaced
| 8:c89ee0059c70 | 9:b3c01b790314 |
|---|---|
| 1 #!/usr/bin/env python | |
| 2 # coding: utf-8 | |
| 3 | |
| 4 # In[1309]: | |
| 5 | |
| 6 | |
| 7 #ASIST program for phenotype based on Antibiotics profile | |
| 8 # create a profile based on selected antibiotics only | |
| 9 # rakesh4osdd@gmail.com, 14-June-2021 | |
| 10 | |
| 11 | |
| 12 # In[1]: | |
| 13 | |
| 14 | |
| 15 import pandas as pd | |
| 16 import sys | |
| 17 import os | |
| 18 from collections import Counter | |
| 19 | |
| 20 | |
| 21 # In[176]: | |
| 22 | |
| 23 | |
| 24 input_file=sys.argv[1] | |
| 25 output_file=sys.argv[2] | |
| 26 #input_file='test-data/asist_input.csv' | |
| 27 #output_file='test-data/asist_output.csv' | |
| 28 | |
| 29 | |
| 30 # In[177]: | |
| 31 | |
| 32 | |
| 33 # strain_profile to phenotype condition | |
| 34 def s_phen(sus,res,intm,na,pb_sus): | |
| 35 if (sus>0 and res==0 and na>=0): | |
| 36 #print('Possible Susceptible') | |
| 37 phen='Possible Susceptible' | |
| 38 elif (sus>=0 and 3<=res<7 and na>=0 and pb_sus==0): | |
| 39 #print('Possible MDR') | |
| 40 phen='Possible MDR' | |
| 41 elif (sus>=0 and 7<=res<9 and na>=0 and pb_sus==0): | |
| 42 #print('Possible XDR') | |
| 43 phen='Possible XDR' | |
| 44 #special cases | |
| 45 elif (sus>=1 and res>0 and na>=0 and pb_sus==1): | |
| 46 #print('Possible XDR') | |
| 47 phen='Possible XDR' | |
| 48 #special cases | |
| 49 elif (sus>0 and res==9 and na>=0): | |
| 50 #print('Possible XDR') | |
| 51 phen='Possible XDR' | |
| 52 elif (sus==0 and res==9 and na>=0): | |
| 53 #print('Possible TDR') | |
| 54 phen='Possible TDR' | |
| 55 else: | |
| 56 #print('Strain could not be classified') | |
| 57 phen='Strain could not be classified ('+ str(intm)+' | ' + str(na) +')' | |
| 58 return(phen) | |
| 59 | |
| 60 #print(s_phen(1,9,0,0)) | |
| 61 | |
| 62 | |
| 63 # In[178]: | |
| 64 | |
| 65 | |
| 66 # define Antibiotic groups as per antibiotic of CLSI breakpoints MIC | |
| 67 #Aminoglycoside | |
| 68 cat1=['Amikacin','Tobramycin','Gentamycin','Netilmicin'] | |
| 69 #Beta-lactams- Carbapenems | |
| 70 cat2=['Imipenem','Meropenam','Doripenem'] | |
| 71 #Fluoroquinolone | |
| 72 cat3=['Ciprofloxacin','Levofloxacin'] | |
| 73 #Beta-lactam inhibitor | |
| 74 cat4=['Piperacillin/tazobactam','Ticarcillin/clavulanicacid'] | |
| 75 #Cephalosporin | |
| 76 cat5=['Cefotaxime','Ceftriaxone','Ceftazidime','Cefepime'] | |
| 77 #Sulfonamides | |
| 78 cat6=['Trimethoprim/sulfamethoxazole'] | |
| 79 #Penicillins/beta-lactamase | |
| 80 cat7=['Ampicillin/sulbactam'] | |
| 81 #Polymyxins | |
| 82 cat8=['Colistin','Polymyxinb'] | |
| 83 #Tetracycline | |
| 84 cat9=['Tetracycline','Doxicycline','Minocycline'] | |
| 85 | |
| 86 def s_profiler(pd_series): | |
| 87 #print(type(pd_series),'\n', pd_series) | |
| 88 #create a dictionary of dataframe series | |
| 89 cats={'s1':cat1,'s2':cat2,'s3':cat3,'s4':cat4,'s5':cat5,'s6':cat6,'s7':cat7,'s8':cat8,'s9':cat9} | |
| 90 # find the antibiotics name in input series | |
| 91 for cat in cats: | |
| 92 #print(cats[cat]) | |
| 93 cats[cat]=pd_series.filter(cats[cat]) | |
| 94 #print(cats[cat]) | |
| 95 #define res,sus,intm,na,pb_sus | |
| 96 res=0 | |
| 97 sus=0 | |
| 98 intm=0 | |
| 99 na=0 | |
| 100 pb_sus=0 | |
| 101 # special case of 'Polymyxin b' for its value | |
| 102 if 'Polymyxinb' in pd_series: | |
| 103 ctp=cats['s8']['Polymyxinb'].strip().lower() | |
| 104 if ctp == 'susceptible': | |
| 105 pb_sus=1 | |
| 106 #print((ctp,p_sus)) | |
| 107 # check all categories | |
| 108 for cat in cats: | |
| 109 #ctp=cats['s8'].iloc[i:i+1].stack().value_counts().to_dict() | |
| 110 #print(ctp) | |
| 111 # Pandas series | |
| 112 ct=cats[cat].value_counts().to_dict() | |
| 113 #print(ct) | |
| 114 # remove whitespace and convert to lowercase words | |
| 115 ct = {k.strip().lower(): v for k, v in ct.items()} | |
| 116 #print(ct) | |
| 117 k=Counter(ct) | |
| 118 #j=Counter(ct)+Counter(j) | |
| 119 #print(j) | |
| 120 # category wise marking | |
| 121 if k['resistant']>=1: | |
| 122 res=res+1 | |
| 123 if k['susceptible']>=1: | |
| 124 sus=sus+1 | |
| 125 if k['intermediate']>=1: | |
| 126 intm=intm+1 | |
| 127 if k['na']>=1: | |
| 128 na=na+1 | |
| 129 #print(sus,res,intm,na,pb_sus) | |
| 130 #print(s_phen(sus,res,intm,na,pb_sus)) | |
| 131 return(s_phen(sus,res,intm,na,pb_sus)) | |
| 132 | |
| 133 | |
| 134 # In[179]: | |
| 135 | |
| 136 | |
| 137 #input_file='input2.csv_table.csv' | |
| 138 #output_file=input_file+'_output.txt' | |
| 139 strain_profile=pd.read_csv(input_file, sep=',',na_filter=False,skipinitialspace = True) | |
| 140 | |
| 141 | |
| 142 # In[180]: | |
| 143 | |
| 144 | |
| 145 old_strain_name=strain_profile.columns[0] | |
| 146 new_strain_name=old_strain_name.capitalize().strip().replace(' ', '') | |
| 147 | |
| 148 | |
| 149 # In[181]: | |
| 150 | |
| 151 | |
| 152 # make header capitalization, remove leading,lagging, and multiple whitespace for comparision | |
| 153 strain_profile.columns=strain_profile.columns.str.capitalize().str.strip().str.replace('\s+', '', regex=True) | |
| 154 #print(strain_profile.columns) | |
| 155 #strain_profile.head() | |
| 156 #strain_profile.columns | |
| 157 | |
| 158 | |
| 159 # In[182]: | |
| 160 | |
| 161 | |
| 162 # add new column in dataframe on second position | |
| 163 strain_profile.insert(1, 'Strain phenotype','') | |
| 164 #strain_profile.head() | |
| 165 | |
| 166 | |
| 167 # In[183]: | |
| 168 | |
| 169 | |
| 170 strain_profile['Strain phenotype'] = strain_profile.apply(lambda x: (s_profiler(x)), axis=1) | |
| 171 | |
| 172 | |
| 173 # In[184]: | |
| 174 | |
| 175 | |
| 176 #strain_profile.head() | |
| 177 | |
| 178 | |
| 179 # In[185]: | |
| 180 | |
| 181 | |
| 182 #rename headers for old name | |
| 183 strain_profile=strain_profile.rename(columns = {new_strain_name:old_strain_name, 'Ticarcillin/clavulanicacid':'Ticarcillin/ clavulanic acid','Piperacillin/tazobactam':'Piperacillin/ tazobactam','Trimethoprim/sulfamethoxazole': 'Trimethoprim/ sulfamethoxazole','Ampicillin/sulbactam':'Ampicillin/ sulbactam', 'Polymyxinb': 'Polymyxin B'} ) | |
| 184 | |
| 185 | |
| 186 # In[186]: | |
| 187 | |
| 188 | |
| 189 #strain_profile.columns | |
| 190 | |
| 191 | |
| 192 # In[187]: | |
| 193 | |
| 194 | |
| 195 #strain_profile | |
| 196 | |
| 197 | |
| 198 # In[188]: | |
| 199 | |
| 200 | |
| 201 strain_profile.to_csv(output_file,na_rep='NA',index=False) | |
| 202 | |
| 203 | |
| 204 # In[189]: | |
| 205 | |
| 206 | |
| 207 # Open a file with access mode 'a' | |
| 208 with open(output_file, "a") as file_object: | |
| 209 # Append 'hello' at the end of file | |
| 210 file_object.write("Note: \n1. 'MDR': Multidrug-resistant, 'XDR': Extensively drug-resistant, 'TDR':totally drug resistant, NA': Data Not Available.\n2. 'Strain could not be classified' numbers follow the format as ('Number of antibiotics categories count as Intermediate' | 'Number of antibiotics categories count as NA')") | |
| 211 | |
| 212 | |
| 213 # In[ ]: | |
| 214 | |
| 215 | |
| 216 | |
| 217 |
