Mercurial > repos > rakesh4osdd > clsi_profile
comparison asist_dynamic.ipynb @ 10:116ebdf92e39 draft
"planemo upload for repository https://github.com/rakesh4osdd/asist/tree/master commit 84162f16b2170970978550561485bda556396f08"
author | rakesh4osdd |
---|---|
date | Wed, 30 Jun 2021 06:59:29 +0000 |
parents | |
children |
comparison
equal
deleted
inserted
replaced
9:b3c01b790314 | 10:116ebdf92e39 |
---|---|
1 { | |
2 "cells": [ | |
3 { | |
4 "cell_type": "code", | |
5 "execution_count": 1309, | |
6 "id": "27cfc66f", | |
7 "metadata": {}, | |
8 "outputs": [], | |
9 "source": [ | |
10 "#ASIST program for phenotype based on Antibiotics profile\n", | |
11 "# create a profile based on selected antibiotics only\n", | |
12 "# rakesh4osdd@gmail.com, 14-June-2021" | |
13 ] | |
14 }, | |
15 { | |
16 "cell_type": "code", | |
17 "execution_count": 1, | |
18 "id": "75a352b7", | |
19 "metadata": {}, | |
20 "outputs": [], | |
21 "source": [ | |
22 "import pandas as pd\n", | |
23 "import sys\n", | |
24 "import os\n", | |
25 "from collections import Counter" | |
26 ] | |
27 }, | |
28 { | |
29 "cell_type": "code", | |
30 "execution_count": 162, | |
31 "id": "d66ec0d2", | |
32 "metadata": {}, | |
33 "outputs": [], | |
34 "source": [ | |
35 "#input_file=sys.argv[1]\n", | |
36 "#output_file=sys.argv[2]\n", | |
37 "input_file='test-data/strains_788_input_16k.csv'\n", | |
38 "output_file='/mnt/d/PhD_Work/Tina_Work/ASIST_Galaxy/ASIST/strains_788_output_16k.csv'\n", | |
39 "#input_file='/mnt/d/PhD_Work/Tina_Work/ASIST_Galaxy/ASIST/asist_example15.csv'\n", | |
40 "#output_file='/mnt/d/PhD_Work/Tina_Work/ASIST_Galaxy/ASIST/asist_example15_output.csv'" | |
41 ] | |
42 }, | |
43 { | |
44 "cell_type": "code", | |
45 "execution_count": 163, | |
46 "id": "bf24c946", | |
47 "metadata": {}, | |
48 "outputs": [], | |
49 "source": [ | |
50 "# strain_profile to phenotype condition\n", | |
51 "def s_phen(sus,res,intm,na,pb_sus):\n", | |
52 " if (sus>0 and res==0 and na>=0):\n", | |
53 " #print('Possible Susceptible')\n", | |
54 " phen='Possible Susceptible'\n", | |
55 " elif (sus>=0 and 3<=res<7 and na>=0 and pb_sus==0):\n", | |
56 " #print('Possible MDR')\n", | |
57 " phen='Possible MDR'\n", | |
58 " elif (sus>=0 and 7<=res<9 and na>=0 and pb_sus==0):\n", | |
59 " #print('Possible XDR')\n", | |
60 " phen='Possible XDR'\n", | |
61 " #special cases\n", | |
62 " elif (sus>=1 and res>0 and na>=0 and pb_sus==1):\n", | |
63 " #print('Possible XDR')\n", | |
64 " phen='Possible XDR'\n", | |
65 " #special cases\n", | |
66 " elif (sus>0 and res==9 and na>=0):\n", | |
67 " #print('Possible XDR')\n", | |
68 " phen='Possible XDR'\n", | |
69 " elif (sus==0 and res==9 and na>=0):\n", | |
70 " #print('Possible TDR')\n", | |
71 " phen='Possible TDR'\n", | |
72 " else:\n", | |
73 " #print('Strain could not be classified')\n", | |
74 " phen='Strain could not be classified ('+ str(intm)+' | ' + str(na) +')'\n", | |
75 " return(phen)\n", | |
76 "\n", | |
77 "#print(s_phen(1,9,0,0))" | |
78 ] | |
79 }, | |
80 { | |
81 "cell_type": "code", | |
82 "execution_count": 164, | |
83 "id": "8bad7d9d", | |
84 "metadata": {}, | |
85 "outputs": [], | |
86 "source": [ | |
87 "# define Antibiotic groups as per antibiotic of CLSI breakpoints MIC\n", | |
88 "#Aminoglycoside\n", | |
89 "cat1=['Amikacin','Tobramycin','Gentamycin','Netilmicin']\n", | |
90 "#Beta-lactams- Carbapenems\n", | |
91 "cat2=['Imipenem','Meropenam','Doripenem']\n", | |
92 "#Fluoroquinolone\n", | |
93 "cat3=['Ciprofloxacin','Levofloxacin']\n", | |
94 "#Beta-lactam inhibitor\n", | |
95 "cat4=['Piperacillin/tazobactam','Ticarcillin/clavulanicacid']\n", | |
96 "#Cephalosporin\n", | |
97 "cat5=['Cefotaxime','Ceftriaxone','Ceftazidime','Cefepime']\n", | |
98 "#Sulfonamides\n", | |
99 "cat6=['Trimethoprim/sulfamethoxazole']\n", | |
100 "#Penicillins/beta-lactamase\n", | |
101 "cat7=['Ampicillin/sulbactam']\n", | |
102 "#Polymyxins\n", | |
103 "cat8=['Colistin','Polymyxinb']\n", | |
104 "#Tetracycline\n", | |
105 "cat9=['Tetracycline','Doxicycline','Minocycline']\n", | |
106 "\n", | |
107 "def s_profiler(pd_series):\n", | |
108 " #print(type(pd_series),'\\n', pd_series)\n", | |
109 " #create a dictionary of dataframe series\n", | |
110 " cats={'s1':cat1,'s2':cat2,'s3':cat3,'s4':cat4,'s5':cat5,'s6':cat6,'s7':cat7,'s8':cat8,'s9':cat9}\n", | |
111 " # find the antibiotics name in input series\n", | |
112 " for cat in cats:\n", | |
113 " #print(cats[cat])\n", | |
114 " cats[cat]=pd_series.filter(cats[cat])\n", | |
115 " #print(cats[cat])\n", | |
116 " #define res,sus,intm,na,pb_sus\n", | |
117 " res=0\n", | |
118 " sus=0\n", | |
119 " intm=0\n", | |
120 " na=0\n", | |
121 " pb_sus=0\n", | |
122 " # special case of 'Polymyxin b' for its value\n", | |
123 " if 'Polymyxinb' in pd_series:\n", | |
124 " ctp=cats['s8']['Polymyxinb'].strip().lower()\n", | |
125 " if ctp == 'susceptible':\n", | |
126 " pb_sus=1\n", | |
127 " #print((ctp,p_sus))\n", | |
128 " # check all categories\n", | |
129 " for cat in cats:\n", | |
130 " #ctp=cats['s8'].iloc[i:i+1].stack().value_counts().to_dict()\n", | |
131 " #print(ctp)\n", | |
132 " # Pandas series\n", | |
133 " ct=cats[cat].value_counts().to_dict()\n", | |
134 " #print(ct)\n", | |
135 " # remove whitespace and convert to lowercase words\n", | |
136 " ct = {k.strip().lower(): v for k, v in ct.items()}\n", | |
137 " #print(ct)\n", | |
138 " k=Counter(ct)\n", | |
139 " #j=Counter(ct)+Counter(j)\n", | |
140 " #print(j)\n", | |
141 " # category wise marking\n", | |
142 " if k['resistant']>=1:\n", | |
143 " res=res+1\n", | |
144 " if k['susceptible']>=1:\n", | |
145 " sus=sus+1\n", | |
146 " if k['intermediate']>=1:\n", | |
147 " intm=intm+1\n", | |
148 " if k['na']>=1:\n", | |
149 " na=na+1\n", | |
150 " #print(sus,res,intm,na,pb_sus)\n", | |
151 " #print(s_phen(sus,res,intm,na,pb_sus))\n", | |
152 " return(s_phen(sus,res,intm,na,pb_sus))" | |
153 ] | |
154 }, | |
155 { | |
156 "cell_type": "code", | |
157 "execution_count": 165, | |
158 "id": "7629fc10", | |
159 "metadata": {}, | |
160 "outputs": [], | |
161 "source": [ | |
162 "#input_file='input2.csv_table.csv'\n", | |
163 "#output_file=input_file+'_output.txt'\n", | |
164 "strain_profile=pd.read_csv(input_file, sep=',',na_filter=False,skipinitialspace = True)" | |
165 ] | |
166 }, | |
167 { | |
168 "cell_type": "code", | |
169 "execution_count": 166, | |
170 "id": "bed1abba", | |
171 "metadata": {}, | |
172 "outputs": [], | |
173 "source": [ | |
174 "old_strain_name=strain_profile.columns[0]\n", | |
175 "new_strain_name=old_strain_name.capitalize().strip().replace(' ', '')" | |
176 ] | |
177 }, | |
178 { | |
179 "cell_type": "code", | |
180 "execution_count": 167, | |
181 "id": "a64b5022", | |
182 "metadata": {}, | |
183 "outputs": [], | |
184 "source": [ | |
185 "# make header capitalization, remove leading,lagging, and multiple whitespace for comparision\n", | |
186 "strain_profile.columns=strain_profile.columns.str.capitalize().str.strip().str.replace('\\s+', '', regex=True)\n", | |
187 "#print(strain_profile.columns)\n", | |
188 "#strain_profile.head()\n", | |
189 "#strain_profile.columns" | |
190 ] | |
191 }, | |
192 { | |
193 "cell_type": "code", | |
194 "execution_count": 168, | |
195 "id": "caac57d7", | |
196 "metadata": {}, | |
197 "outputs": [], | |
198 "source": [ | |
199 "# add new column in dataframe on second position\n", | |
200 "strain_profile.insert(1, 'Strain phenotype','')\n", | |
201 "#strain_profile.head()" | |
202 ] | |
203 }, | |
204 { | |
205 "cell_type": "code", | |
206 "execution_count": 169, | |
207 "id": "eb4b0c4d", | |
208 "metadata": { | |
209 "scrolled": true | |
210 }, | |
211 "outputs": [], | |
212 "source": [ | |
213 "strain_profile['Strain phenotype'] = strain_profile.apply(lambda x: (s_profiler(x)), axis=1)" | |
214 ] | |
215 }, | |
216 { | |
217 "cell_type": "code", | |
218 "execution_count": 170, | |
219 "id": "86441c0f", | |
220 "metadata": {}, | |
221 "outputs": [], | |
222 "source": [ | |
223 "#strain_profile.head()" | |
224 ] | |
225 }, | |
226 { | |
227 "cell_type": "code", | |
228 "execution_count": 171, | |
229 "id": "75698be5", | |
230 "metadata": {}, | |
231 "outputs": [], | |
232 "source": [ | |
233 "#rename headers for old name\n", | |
234 "strain_profile=strain_profile.rename(columns = {new_strain_name:old_strain_name, 'Ticarcillin/clavulanicacid':'Ticarcillin/ clavulanic acid','Piperacillin/tazobactam':'Piperacillin/ tazobactam','Trimethoprim/sulfamethoxazole': 'Trimethoprim/ sulfamethoxazole','Ampicillin/sulbactam':'Ampicillin/ sulbactam', 'Polymyxinb': 'Polymyxin B'} )" | |
235 ] | |
236 }, | |
237 { | |
238 "cell_type": "code", | |
239 "execution_count": 172, | |
240 "id": "c14a13eb", | |
241 "metadata": { | |
242 "scrolled": true | |
243 }, | |
244 "outputs": [], | |
245 "source": [ | |
246 "#strain_profile.columns" | |
247 ] | |
248 }, | |
249 { | |
250 "cell_type": "code", | |
251 "execution_count": 173, | |
252 "id": "1b113050", | |
253 "metadata": {}, | |
254 "outputs": [], | |
255 "source": [ | |
256 "#strain_profile" | |
257 ] | |
258 }, | |
259 { | |
260 "cell_type": "code", | |
261 "execution_count": 174, | |
262 "id": "5ab72211", | |
263 "metadata": {}, | |
264 "outputs": [], | |
265 "source": [ | |
266 "strain_profile.to_csv(output_file,na_rep='NA',index=False)" | |
267 ] | |
268 }, | |
269 { | |
270 "cell_type": "code", | |
271 "execution_count": 175, | |
272 "id": "c17c84c4", | |
273 "metadata": {}, | |
274 "outputs": [], | |
275 "source": [ | |
276 "# Open a file with access mode 'a'\n", | |
277 "with open(output_file, \"a\") as file_object:\n", | |
278 " # Append 'hello' at the end of file\n", | |
279 " file_object.write(\"Note: \\n1. 'MDR': Multidrug-resistant, 'XDR': Extensively drug-resistant, 'TDR':totally drug resistant, NA': Data Not Available.\\n2. 'Strain could not be classified' numbers follow the format as ('Number of antibiotics categories count as Intermediate' | 'Number of antibiotics categories count as NA')\")" | |
280 ] | |
281 }, | |
282 { | |
283 "cell_type": "code", | |
284 "execution_count": null, | |
285 "id": "7e8e1fa8", | |
286 "metadata": {}, | |
287 "outputs": [], | |
288 "source": [] | |
289 } | |
290 ], | |
291 "metadata": { | |
292 "kernelspec": { | |
293 "display_name": "Python 3", | |
294 "language": "python", | |
295 "name": "python3" | |
296 }, | |
297 "language_info": { | |
298 "codemirror_mode": { | |
299 "name": "ipython", | |
300 "version": 3 | |
301 }, | |
302 "file_extension": ".py", | |
303 "mimetype": "text/x-python", | |
304 "name": "python", | |
305 "nbconvert_exporter": "python", | |
306 "pygments_lexer": "ipython3", | |
307 "version": "3.7.10" | |
308 } | |
309 }, | |
310 "nbformat": 4, | |
311 "nbformat_minor": 5 | |
312 } |