comparison asist_dynamic.ipynb @ 10:116ebdf92e39 draft

"planemo upload for repository https://github.com/rakesh4osdd/asist/tree/master commit 84162f16b2170970978550561485bda556396f08"
author rakesh4osdd
date Wed, 30 Jun 2021 06:59:29 +0000
parents
children
comparison
equal deleted inserted replaced
9:b3c01b790314 10:116ebdf92e39
1 {
2 "cells": [
3 {
4 "cell_type": "code",
5 "execution_count": 1309,
6 "id": "27cfc66f",
7 "metadata": {},
8 "outputs": [],
9 "source": [
10 "#ASIST program for phenotype based on Antibiotics profile\n",
11 "# create a profile based on selected antibiotics only\n",
12 "# rakesh4osdd@gmail.com, 14-June-2021"
13 ]
14 },
15 {
16 "cell_type": "code",
17 "execution_count": 1,
18 "id": "75a352b7",
19 "metadata": {},
20 "outputs": [],
21 "source": [
22 "import pandas as pd\n",
23 "import sys\n",
24 "import os\n",
25 "from collections import Counter"
26 ]
27 },
28 {
29 "cell_type": "code",
30 "execution_count": 162,
31 "id": "d66ec0d2",
32 "metadata": {},
33 "outputs": [],
34 "source": [
35 "#input_file=sys.argv[1]\n",
36 "#output_file=sys.argv[2]\n",
37 "input_file='test-data/strains_788_input_16k.csv'\n",
38 "output_file='/mnt/d/PhD_Work/Tina_Work/ASIST_Galaxy/ASIST/strains_788_output_16k.csv'\n",
39 "#input_file='/mnt/d/PhD_Work/Tina_Work/ASIST_Galaxy/ASIST/asist_example15.csv'\n",
40 "#output_file='/mnt/d/PhD_Work/Tina_Work/ASIST_Galaxy/ASIST/asist_example15_output.csv'"
41 ]
42 },
43 {
44 "cell_type": "code",
45 "execution_count": 163,
46 "id": "bf24c946",
47 "metadata": {},
48 "outputs": [],
49 "source": [
50 "# strain_profile to phenotype condition\n",
51 "def s_phen(sus,res,intm,na,pb_sus):\n",
52 " if (sus>0 and res==0 and na>=0):\n",
53 " #print('Possible Susceptible')\n",
54 " phen='Possible Susceptible'\n",
55 " elif (sus>=0 and 3<=res<7 and na>=0 and pb_sus==0):\n",
56 " #print('Possible MDR')\n",
57 " phen='Possible MDR'\n",
58 " elif (sus>=0 and 7<=res<9 and na>=0 and pb_sus==0):\n",
59 " #print('Possible XDR')\n",
60 " phen='Possible XDR'\n",
61 " #special cases\n",
62 " elif (sus>=1 and res>0 and na>=0 and pb_sus==1):\n",
63 " #print('Possible XDR')\n",
64 " phen='Possible XDR'\n",
65 " #special cases\n",
66 " elif (sus>0 and res==9 and na>=0):\n",
67 " #print('Possible XDR')\n",
68 " phen='Possible XDR'\n",
69 " elif (sus==0 and res==9 and na>=0):\n",
70 " #print('Possible TDR')\n",
71 " phen='Possible TDR'\n",
72 " else:\n",
73 " #print('Strain could not be classified')\n",
74 " phen='Strain could not be classified ('+ str(intm)+' | ' + str(na) +')'\n",
75 " return(phen)\n",
76 "\n",
77 "#print(s_phen(1,9,0,0))"
78 ]
79 },
80 {
81 "cell_type": "code",
82 "execution_count": 164,
83 "id": "8bad7d9d",
84 "metadata": {},
85 "outputs": [],
86 "source": [
87 "# define Antibiotic groups as per antibiotic of CLSI breakpoints MIC\n",
88 "#Aminoglycoside\n",
89 "cat1=['Amikacin','Tobramycin','Gentamycin','Netilmicin']\n",
90 "#Beta-lactams- Carbapenems\n",
91 "cat2=['Imipenem','Meropenam','Doripenem']\n",
92 "#Fluoroquinolone\n",
93 "cat3=['Ciprofloxacin','Levofloxacin']\n",
94 "#Beta-lactam inhibitor\n",
95 "cat4=['Piperacillin/tazobactam','Ticarcillin/clavulanicacid']\n",
96 "#Cephalosporin\n",
97 "cat5=['Cefotaxime','Ceftriaxone','Ceftazidime','Cefepime']\n",
98 "#Sulfonamides\n",
99 "cat6=['Trimethoprim/sulfamethoxazole']\n",
100 "#Penicillins/beta-lactamase\n",
101 "cat7=['Ampicillin/sulbactam']\n",
102 "#Polymyxins\n",
103 "cat8=['Colistin','Polymyxinb']\n",
104 "#Tetracycline\n",
105 "cat9=['Tetracycline','Doxicycline','Minocycline']\n",
106 "\n",
107 "def s_profiler(pd_series):\n",
108 " #print(type(pd_series),'\\n', pd_series)\n",
109 " #create a dictionary of dataframe series\n",
110 " cats={'s1':cat1,'s2':cat2,'s3':cat3,'s4':cat4,'s5':cat5,'s6':cat6,'s7':cat7,'s8':cat8,'s9':cat9}\n",
111 " # find the antibiotics name in input series\n",
112 " for cat in cats:\n",
113 " #print(cats[cat])\n",
114 " cats[cat]=pd_series.filter(cats[cat])\n",
115 " #print(cats[cat])\n",
116 " #define res,sus,intm,na,pb_sus\n",
117 " res=0\n",
118 " sus=0\n",
119 " intm=0\n",
120 " na=0\n",
121 " pb_sus=0\n",
122 " # special case of 'Polymyxin b' for its value\n",
123 " if 'Polymyxinb' in pd_series:\n",
124 " ctp=cats['s8']['Polymyxinb'].strip().lower()\n",
125 " if ctp == 'susceptible':\n",
126 " pb_sus=1\n",
127 " #print((ctp,p_sus))\n",
128 " # check all categories\n",
129 " for cat in cats:\n",
130 " #ctp=cats['s8'].iloc[i:i+1].stack().value_counts().to_dict()\n",
131 " #print(ctp)\n",
132 " # Pandas series\n",
133 " ct=cats[cat].value_counts().to_dict()\n",
134 " #print(ct)\n",
135 " # remove whitespace and convert to lowercase words\n",
136 " ct = {k.strip().lower(): v for k, v in ct.items()}\n",
137 " #print(ct)\n",
138 " k=Counter(ct)\n",
139 " #j=Counter(ct)+Counter(j)\n",
140 " #print(j)\n",
141 " # category wise marking\n",
142 " if k['resistant']>=1:\n",
143 " res=res+1\n",
144 " if k['susceptible']>=1:\n",
145 " sus=sus+1\n",
146 " if k['intermediate']>=1:\n",
147 " intm=intm+1\n",
148 " if k['na']>=1:\n",
149 " na=na+1\n",
150 " #print(sus,res,intm,na,pb_sus)\n",
151 " #print(s_phen(sus,res,intm,na,pb_sus))\n",
152 " return(s_phen(sus,res,intm,na,pb_sus))"
153 ]
154 },
155 {
156 "cell_type": "code",
157 "execution_count": 165,
158 "id": "7629fc10",
159 "metadata": {},
160 "outputs": [],
161 "source": [
162 "#input_file='input2.csv_table.csv'\n",
163 "#output_file=input_file+'_output.txt'\n",
164 "strain_profile=pd.read_csv(input_file, sep=',',na_filter=False,skipinitialspace = True)"
165 ]
166 },
167 {
168 "cell_type": "code",
169 "execution_count": 166,
170 "id": "bed1abba",
171 "metadata": {},
172 "outputs": [],
173 "source": [
174 "old_strain_name=strain_profile.columns[0]\n",
175 "new_strain_name=old_strain_name.capitalize().strip().replace(' ', '')"
176 ]
177 },
178 {
179 "cell_type": "code",
180 "execution_count": 167,
181 "id": "a64b5022",
182 "metadata": {},
183 "outputs": [],
184 "source": [
185 "# make header capitalization, remove leading,lagging, and multiple whitespace for comparision\n",
186 "strain_profile.columns=strain_profile.columns.str.capitalize().str.strip().str.replace('\\s+', '', regex=True)\n",
187 "#print(strain_profile.columns)\n",
188 "#strain_profile.head()\n",
189 "#strain_profile.columns"
190 ]
191 },
192 {
193 "cell_type": "code",
194 "execution_count": 168,
195 "id": "caac57d7",
196 "metadata": {},
197 "outputs": [],
198 "source": [
199 "# add new column in dataframe on second position\n",
200 "strain_profile.insert(1, 'Strain phenotype','')\n",
201 "#strain_profile.head()"
202 ]
203 },
204 {
205 "cell_type": "code",
206 "execution_count": 169,
207 "id": "eb4b0c4d",
208 "metadata": {
209 "scrolled": true
210 },
211 "outputs": [],
212 "source": [
213 "strain_profile['Strain phenotype'] = strain_profile.apply(lambda x: (s_profiler(x)), axis=1)"
214 ]
215 },
216 {
217 "cell_type": "code",
218 "execution_count": 170,
219 "id": "86441c0f",
220 "metadata": {},
221 "outputs": [],
222 "source": [
223 "#strain_profile.head()"
224 ]
225 },
226 {
227 "cell_type": "code",
228 "execution_count": 171,
229 "id": "75698be5",
230 "metadata": {},
231 "outputs": [],
232 "source": [
233 "#rename headers for old name\n",
234 "strain_profile=strain_profile.rename(columns = {new_strain_name:old_strain_name, 'Ticarcillin/clavulanicacid':'Ticarcillin/ clavulanic acid','Piperacillin/tazobactam':'Piperacillin/ tazobactam','Trimethoprim/sulfamethoxazole': 'Trimethoprim/ sulfamethoxazole','Ampicillin/sulbactam':'Ampicillin/ sulbactam', 'Polymyxinb': 'Polymyxin B'} )"
235 ]
236 },
237 {
238 "cell_type": "code",
239 "execution_count": 172,
240 "id": "c14a13eb",
241 "metadata": {
242 "scrolled": true
243 },
244 "outputs": [],
245 "source": [
246 "#strain_profile.columns"
247 ]
248 },
249 {
250 "cell_type": "code",
251 "execution_count": 173,
252 "id": "1b113050",
253 "metadata": {},
254 "outputs": [],
255 "source": [
256 "#strain_profile"
257 ]
258 },
259 {
260 "cell_type": "code",
261 "execution_count": 174,
262 "id": "5ab72211",
263 "metadata": {},
264 "outputs": [],
265 "source": [
266 "strain_profile.to_csv(output_file,na_rep='NA',index=False)"
267 ]
268 },
269 {
270 "cell_type": "code",
271 "execution_count": 175,
272 "id": "c17c84c4",
273 "metadata": {},
274 "outputs": [],
275 "source": [
276 "# Open a file with access mode 'a'\n",
277 "with open(output_file, \"a\") as file_object:\n",
278 " # Append 'hello' at the end of file\n",
279 " file_object.write(\"Note: \\n1. 'MDR': Multidrug-resistant, 'XDR': Extensively drug-resistant, 'TDR':totally drug resistant, NA': Data Not Available.\\n2. 'Strain could not be classified' numbers follow the format as ('Number of antibiotics categories count as Intermediate' | 'Number of antibiotics categories count as NA')\")"
280 ]
281 },
282 {
283 "cell_type": "code",
284 "execution_count": null,
285 "id": "7e8e1fa8",
286 "metadata": {},
287 "outputs": [],
288 "source": []
289 }
290 ],
291 "metadata": {
292 "kernelspec": {
293 "display_name": "Python 3",
294 "language": "python",
295 "name": "python3"
296 },
297 "language_info": {
298 "codemirror_mode": {
299 "name": "ipython",
300 "version": 3
301 },
302 "file_extension": ".py",
303 "mimetype": "text/x-python",
304 "name": "python",
305 "nbconvert_exporter": "python",
306 "pygments_lexer": "ipython3",
307 "version": "3.7.10"
308 }
309 },
310 "nbformat": 4,
311 "nbformat_minor": 5
312 }