Mercurial > repos > rakesh4osdd > clsi_profile
comparison asist_dynamic.py @ 9:b3c01b790314 draft
"planemo upload for repository https://github.com/rakesh4osdd/asist/tree/master commit f590c3b1d71a9b8f2030909fa488b4ac0c3caed8"
author | rakesh4osdd |
---|---|
date | Wed, 30 Jun 2021 06:37:12 +0000 |
parents | |
children |
comparison
equal
deleted
inserted
replaced
8:c89ee0059c70 | 9:b3c01b790314 |
---|---|
1 #!/usr/bin/env python | |
2 # coding: utf-8 | |
3 | |
4 # In[1309]: | |
5 | |
6 | |
7 #ASIST program for phenotype based on Antibiotics profile | |
8 # create a profile based on selected antibiotics only | |
9 # rakesh4osdd@gmail.com, 14-June-2021 | |
10 | |
11 | |
12 # In[1]: | |
13 | |
14 | |
15 import pandas as pd | |
16 import sys | |
17 import os | |
18 from collections import Counter | |
19 | |
20 | |
21 # In[176]: | |
22 | |
23 | |
24 input_file=sys.argv[1] | |
25 output_file=sys.argv[2] | |
26 #input_file='test-data/asist_input.csv' | |
27 #output_file='test-data/asist_output.csv' | |
28 | |
29 | |
30 # In[177]: | |
31 | |
32 | |
33 # strain_profile to phenotype condition | |
34 def s_phen(sus,res,intm,na,pb_sus): | |
35 if (sus>0 and res==0 and na>=0): | |
36 #print('Possible Susceptible') | |
37 phen='Possible Susceptible' | |
38 elif (sus>=0 and 3<=res<7 and na>=0 and pb_sus==0): | |
39 #print('Possible MDR') | |
40 phen='Possible MDR' | |
41 elif (sus>=0 and 7<=res<9 and na>=0 and pb_sus==0): | |
42 #print('Possible XDR') | |
43 phen='Possible XDR' | |
44 #special cases | |
45 elif (sus>=1 and res>0 and na>=0 and pb_sus==1): | |
46 #print('Possible XDR') | |
47 phen='Possible XDR' | |
48 #special cases | |
49 elif (sus>0 and res==9 and na>=0): | |
50 #print('Possible XDR') | |
51 phen='Possible XDR' | |
52 elif (sus==0 and res==9 and na>=0): | |
53 #print('Possible TDR') | |
54 phen='Possible TDR' | |
55 else: | |
56 #print('Strain could not be classified') | |
57 phen='Strain could not be classified ('+ str(intm)+' | ' + str(na) +')' | |
58 return(phen) | |
59 | |
60 #print(s_phen(1,9,0,0)) | |
61 | |
62 | |
63 # In[178]: | |
64 | |
65 | |
66 # define Antibiotic groups as per antibiotic of CLSI breakpoints MIC | |
67 #Aminoglycoside | |
68 cat1=['Amikacin','Tobramycin','Gentamycin','Netilmicin'] | |
69 #Beta-lactams- Carbapenems | |
70 cat2=['Imipenem','Meropenam','Doripenem'] | |
71 #Fluoroquinolone | |
72 cat3=['Ciprofloxacin','Levofloxacin'] | |
73 #Beta-lactam inhibitor | |
74 cat4=['Piperacillin/tazobactam','Ticarcillin/clavulanicacid'] | |
75 #Cephalosporin | |
76 cat5=['Cefotaxime','Ceftriaxone','Ceftazidime','Cefepime'] | |
77 #Sulfonamides | |
78 cat6=['Trimethoprim/sulfamethoxazole'] | |
79 #Penicillins/beta-lactamase | |
80 cat7=['Ampicillin/sulbactam'] | |
81 #Polymyxins | |
82 cat8=['Colistin','Polymyxinb'] | |
83 #Tetracycline | |
84 cat9=['Tetracycline','Doxicycline','Minocycline'] | |
85 | |
86 def s_profiler(pd_series): | |
87 #print(type(pd_series),'\n', pd_series) | |
88 #create a dictionary of dataframe series | |
89 cats={'s1':cat1,'s2':cat2,'s3':cat3,'s4':cat4,'s5':cat5,'s6':cat6,'s7':cat7,'s8':cat8,'s9':cat9} | |
90 # find the antibiotics name in input series | |
91 for cat in cats: | |
92 #print(cats[cat]) | |
93 cats[cat]=pd_series.filter(cats[cat]) | |
94 #print(cats[cat]) | |
95 #define res,sus,intm,na,pb_sus | |
96 res=0 | |
97 sus=0 | |
98 intm=0 | |
99 na=0 | |
100 pb_sus=0 | |
101 # special case of 'Polymyxin b' for its value | |
102 if 'Polymyxinb' in pd_series: | |
103 ctp=cats['s8']['Polymyxinb'].strip().lower() | |
104 if ctp == 'susceptible': | |
105 pb_sus=1 | |
106 #print((ctp,p_sus)) | |
107 # check all categories | |
108 for cat in cats: | |
109 #ctp=cats['s8'].iloc[i:i+1].stack().value_counts().to_dict() | |
110 #print(ctp) | |
111 # Pandas series | |
112 ct=cats[cat].value_counts().to_dict() | |
113 #print(ct) | |
114 # remove whitespace and convert to lowercase words | |
115 ct = {k.strip().lower(): v for k, v in ct.items()} | |
116 #print(ct) | |
117 k=Counter(ct) | |
118 #j=Counter(ct)+Counter(j) | |
119 #print(j) | |
120 # category wise marking | |
121 if k['resistant']>=1: | |
122 res=res+1 | |
123 if k['susceptible']>=1: | |
124 sus=sus+1 | |
125 if k['intermediate']>=1: | |
126 intm=intm+1 | |
127 if k['na']>=1: | |
128 na=na+1 | |
129 #print(sus,res,intm,na,pb_sus) | |
130 #print(s_phen(sus,res,intm,na,pb_sus)) | |
131 return(s_phen(sus,res,intm,na,pb_sus)) | |
132 | |
133 | |
134 # In[179]: | |
135 | |
136 | |
137 #input_file='input2.csv_table.csv' | |
138 #output_file=input_file+'_output.txt' | |
139 strain_profile=pd.read_csv(input_file, sep=',',na_filter=False,skipinitialspace = True) | |
140 | |
141 | |
142 # In[180]: | |
143 | |
144 | |
145 old_strain_name=strain_profile.columns[0] | |
146 new_strain_name=old_strain_name.capitalize().strip().replace(' ', '') | |
147 | |
148 | |
149 # In[181]: | |
150 | |
151 | |
152 # make header capitalization, remove leading,lagging, and multiple whitespace for comparision | |
153 strain_profile.columns=strain_profile.columns.str.capitalize().str.strip().str.replace('\s+', '', regex=True) | |
154 #print(strain_profile.columns) | |
155 #strain_profile.head() | |
156 #strain_profile.columns | |
157 | |
158 | |
159 # In[182]: | |
160 | |
161 | |
162 # add new column in dataframe on second position | |
163 strain_profile.insert(1, 'Strain phenotype','') | |
164 #strain_profile.head() | |
165 | |
166 | |
167 # In[183]: | |
168 | |
169 | |
170 strain_profile['Strain phenotype'] = strain_profile.apply(lambda x: (s_profiler(x)), axis=1) | |
171 | |
172 | |
173 # In[184]: | |
174 | |
175 | |
176 #strain_profile.head() | |
177 | |
178 | |
179 # In[185]: | |
180 | |
181 | |
182 #rename headers for old name | |
183 strain_profile=strain_profile.rename(columns = {new_strain_name:old_strain_name, 'Ticarcillin/clavulanicacid':'Ticarcillin/ clavulanic acid','Piperacillin/tazobactam':'Piperacillin/ tazobactam','Trimethoprim/sulfamethoxazole': 'Trimethoprim/ sulfamethoxazole','Ampicillin/sulbactam':'Ampicillin/ sulbactam', 'Polymyxinb': 'Polymyxin B'} ) | |
184 | |
185 | |
186 # In[186]: | |
187 | |
188 | |
189 #strain_profile.columns | |
190 | |
191 | |
192 # In[187]: | |
193 | |
194 | |
195 #strain_profile | |
196 | |
197 | |
198 # In[188]: | |
199 | |
200 | |
201 strain_profile.to_csv(output_file,na_rep='NA',index=False) | |
202 | |
203 | |
204 # In[189]: | |
205 | |
206 | |
207 # Open a file with access mode 'a' | |
208 with open(output_file, "a") as file_object: | |
209 # Append 'hello' at the end of file | |
210 file_object.write("Note: \n1. 'MDR': Multidrug-resistant, 'XDR': Extensively drug-resistant, 'TDR':totally drug resistant, NA': Data Not Available.\n2. 'Strain could not be classified' numbers follow the format as ('Number of antibiotics categories count as Intermediate' | 'Number of antibiotics categories count as NA')") | |
211 | |
212 | |
213 # In[ ]: | |
214 | |
215 | |
216 | |
217 |