Mercurial > repos > rakesh4osdd > clsi_profile
comparison asist_dynamic.ipynb @ 11:7dcc0e93288b draft default tip
"planemo upload for repository https://github.com/rakesh4osdd/asist/tree/master commit f590c3b1d71a9b8f2030909fa488b4ac0c3caed8-dirty"
| author | rakesh4osdd |
|---|---|
| date | Wed, 30 Jun 2021 07:13:29 +0000 |
| parents | 116ebdf92e39 |
| children |
comparison
equal
deleted
inserted
replaced
| 10:116ebdf92e39 | 11:7dcc0e93288b |
|---|---|
| 1 { | |
| 2 "cells": [ | |
| 3 { | |
| 4 "cell_type": "code", | |
| 5 "execution_count": 1309, | |
| 6 "id": "27cfc66f", | |
| 7 "metadata": {}, | |
| 8 "outputs": [], | |
| 9 "source": [ | |
| 10 "#ASIST program for phenotype based on Antibiotics profile\n", | |
| 11 "# create a profile based on selected antibiotics only\n", | |
| 12 "# rakesh4osdd@gmail.com, 14-June-2021" | |
| 13 ] | |
| 14 }, | |
| 15 { | |
| 16 "cell_type": "code", | |
| 17 "execution_count": 1, | |
| 18 "id": "75a352b7", | |
| 19 "metadata": {}, | |
| 20 "outputs": [], | |
| 21 "source": [ | |
| 22 "import pandas as pd\n", | |
| 23 "import sys\n", | |
| 24 "import os\n", | |
| 25 "from collections import Counter" | |
| 26 ] | |
| 27 }, | |
| 28 { | |
| 29 "cell_type": "code", | |
| 30 "execution_count": 162, | |
| 31 "id": "d66ec0d2", | |
| 32 "metadata": {}, | |
| 33 "outputs": [], | |
| 34 "source": [ | |
| 35 "#input_file=sys.argv[1]\n", | |
| 36 "#output_file=sys.argv[2]\n", | |
| 37 "input_file='test-data/strains_788_input_16k.csv'\n", | |
| 38 "output_file='/mnt/d/PhD_Work/Tina_Work/ASIST_Galaxy/ASIST/strains_788_output_16k.csv'\n", | |
| 39 "#input_file='/mnt/d/PhD_Work/Tina_Work/ASIST_Galaxy/ASIST/asist_example15.csv'\n", | |
| 40 "#output_file='/mnt/d/PhD_Work/Tina_Work/ASIST_Galaxy/ASIST/asist_example15_output.csv'" | |
| 41 ] | |
| 42 }, | |
| 43 { | |
| 44 "cell_type": "code", | |
| 45 "execution_count": 163, | |
| 46 "id": "bf24c946", | |
| 47 "metadata": {}, | |
| 48 "outputs": [], | |
| 49 "source": [ | |
| 50 "# strain_profile to phenotype condition\n", | |
| 51 "def s_phen(sus,res,intm,na,pb_sus):\n", | |
| 52 " if (sus>0 and res==0 and na>=0):\n", | |
| 53 " #print('Possible Susceptible')\n", | |
| 54 " phen='Possible Susceptible'\n", | |
| 55 " elif (sus>=0 and 3<=res<7 and na>=0 and pb_sus==0):\n", | |
| 56 " #print('Possible MDR')\n", | |
| 57 " phen='Possible MDR'\n", | |
| 58 " elif (sus>=0 and 7<=res<9 and na>=0 and pb_sus==0):\n", | |
| 59 " #print('Possible XDR')\n", | |
| 60 " phen='Possible XDR'\n", | |
| 61 " #special cases\n", | |
| 62 " elif (sus>=1 and res>0 and na>=0 and pb_sus==1):\n", | |
| 63 " #print('Possible XDR')\n", | |
| 64 " phen='Possible XDR'\n", | |
| 65 " #special cases\n", | |
| 66 " elif (sus>0 and res==9 and na>=0):\n", | |
| 67 " #print('Possible XDR')\n", | |
| 68 " phen='Possible XDR'\n", | |
| 69 " elif (sus==0 and res==9 and na>=0):\n", | |
| 70 " #print('Possible TDR')\n", | |
| 71 " phen='Possible TDR'\n", | |
| 72 " else:\n", | |
| 73 " #print('Strain could not be classified')\n", | |
| 74 " phen='Strain could not be classified ('+ str(intm)+' | ' + str(na) +')'\n", | |
| 75 " return(phen)\n", | |
| 76 "\n", | |
| 77 "#print(s_phen(1,9,0,0))" | |
| 78 ] | |
| 79 }, | |
| 80 { | |
| 81 "cell_type": "code", | |
| 82 "execution_count": 164, | |
| 83 "id": "8bad7d9d", | |
| 84 "metadata": {}, | |
| 85 "outputs": [], | |
| 86 "source": [ | |
| 87 "# define Antibiotic groups as per antibiotic of CLSI breakpoints MIC\n", | |
| 88 "#Aminoglycoside\n", | |
| 89 "cat1=['Amikacin','Tobramycin','Gentamycin','Netilmicin']\n", | |
| 90 "#Beta-lactams- Carbapenems\n", | |
| 91 "cat2=['Imipenem','Meropenam','Doripenem']\n", | |
| 92 "#Fluoroquinolone\n", | |
| 93 "cat3=['Ciprofloxacin','Levofloxacin']\n", | |
| 94 "#Beta-lactam inhibitor\n", | |
| 95 "cat4=['Piperacillin/tazobactam','Ticarcillin/clavulanicacid']\n", | |
| 96 "#Cephalosporin\n", | |
| 97 "cat5=['Cefotaxime','Ceftriaxone','Ceftazidime','Cefepime']\n", | |
| 98 "#Sulfonamides\n", | |
| 99 "cat6=['Trimethoprim/sulfamethoxazole']\n", | |
| 100 "#Penicillins/beta-lactamase\n", | |
| 101 "cat7=['Ampicillin/sulbactam']\n", | |
| 102 "#Polymyxins\n", | |
| 103 "cat8=['Colistin','Polymyxinb']\n", | |
| 104 "#Tetracycline\n", | |
| 105 "cat9=['Tetracycline','Doxicycline','Minocycline']\n", | |
| 106 "\n", | |
| 107 "def s_profiler(pd_series):\n", | |
| 108 " #print(type(pd_series),'\\n', pd_series)\n", | |
| 109 " #create a dictionary of dataframe series\n", | |
| 110 " cats={'s1':cat1,'s2':cat2,'s3':cat3,'s4':cat4,'s5':cat5,'s6':cat6,'s7':cat7,'s8':cat8,'s9':cat9}\n", | |
| 111 " # find the antibiotics name in input series\n", | |
| 112 " for cat in cats:\n", | |
| 113 " #print(cats[cat])\n", | |
| 114 " cats[cat]=pd_series.filter(cats[cat])\n", | |
| 115 " #print(cats[cat])\n", | |
| 116 " #define res,sus,intm,na,pb_sus\n", | |
| 117 " res=0\n", | |
| 118 " sus=0\n", | |
| 119 " intm=0\n", | |
| 120 " na=0\n", | |
| 121 " pb_sus=0\n", | |
| 122 " # special case of 'Polymyxin b' for its value\n", | |
| 123 " if 'Polymyxinb' in pd_series:\n", | |
| 124 " ctp=cats['s8']['Polymyxinb'].strip().lower()\n", | |
| 125 " if ctp == 'susceptible':\n", | |
| 126 " pb_sus=1\n", | |
| 127 " #print((ctp,p_sus))\n", | |
| 128 " # check all categories\n", | |
| 129 " for cat in cats:\n", | |
| 130 " #ctp=cats['s8'].iloc[i:i+1].stack().value_counts().to_dict()\n", | |
| 131 " #print(ctp)\n", | |
| 132 " # Pandas series\n", | |
| 133 " ct=cats[cat].value_counts().to_dict()\n", | |
| 134 " #print(ct)\n", | |
| 135 " # remove whitespace and convert to lowercase words\n", | |
| 136 " ct = {k.strip().lower(): v for k, v in ct.items()}\n", | |
| 137 " #print(ct)\n", | |
| 138 " k=Counter(ct)\n", | |
| 139 " #j=Counter(ct)+Counter(j)\n", | |
| 140 " #print(j)\n", | |
| 141 " # category wise marking\n", | |
| 142 " if k['resistant']>=1:\n", | |
| 143 " res=res+1\n", | |
| 144 " if k['susceptible']>=1:\n", | |
| 145 " sus=sus+1\n", | |
| 146 " if k['intermediate']>=1:\n", | |
| 147 " intm=intm+1\n", | |
| 148 " if k['na']>=1:\n", | |
| 149 " na=na+1\n", | |
| 150 " #print(sus,res,intm,na,pb_sus)\n", | |
| 151 " #print(s_phen(sus,res,intm,na,pb_sus))\n", | |
| 152 " return(s_phen(sus,res,intm,na,pb_sus))" | |
| 153 ] | |
| 154 }, | |
| 155 { | |
| 156 "cell_type": "code", | |
| 157 "execution_count": 165, | |
| 158 "id": "7629fc10", | |
| 159 "metadata": {}, | |
| 160 "outputs": [], | |
| 161 "source": [ | |
| 162 "#input_file='input2.csv_table.csv'\n", | |
| 163 "#output_file=input_file+'_output.txt'\n", | |
| 164 "strain_profile=pd.read_csv(input_file, sep=',',na_filter=False,skipinitialspace = True)" | |
| 165 ] | |
| 166 }, | |
| 167 { | |
| 168 "cell_type": "code", | |
| 169 "execution_count": 166, | |
| 170 "id": "bed1abba", | |
| 171 "metadata": {}, | |
| 172 "outputs": [], | |
| 173 "source": [ | |
| 174 "old_strain_name=strain_profile.columns[0]\n", | |
| 175 "new_strain_name=old_strain_name.capitalize().strip().replace(' ', '')" | |
| 176 ] | |
| 177 }, | |
| 178 { | |
| 179 "cell_type": "code", | |
| 180 "execution_count": 167, | |
| 181 "id": "a64b5022", | |
| 182 "metadata": {}, | |
| 183 "outputs": [], | |
| 184 "source": [ | |
| 185 "# make header capitalization, remove leading,lagging, and multiple whitespace for comparision\n", | |
| 186 "strain_profile.columns=strain_profile.columns.str.capitalize().str.strip().str.replace('\\s+', '', regex=True)\n", | |
| 187 "#print(strain_profile.columns)\n", | |
| 188 "#strain_profile.head()\n", | |
| 189 "#strain_profile.columns" | |
| 190 ] | |
| 191 }, | |
| 192 { | |
| 193 "cell_type": "code", | |
| 194 "execution_count": 168, | |
| 195 "id": "caac57d7", | |
| 196 "metadata": {}, | |
| 197 "outputs": [], | |
| 198 "source": [ | |
| 199 "# add new column in dataframe on second position\n", | |
| 200 "strain_profile.insert(1, 'Strain phenotype','')\n", | |
| 201 "#strain_profile.head()" | |
| 202 ] | |
| 203 }, | |
| 204 { | |
| 205 "cell_type": "code", | |
| 206 "execution_count": 169, | |
| 207 "id": "eb4b0c4d", | |
| 208 "metadata": { | |
| 209 "scrolled": true | |
| 210 }, | |
| 211 "outputs": [], | |
| 212 "source": [ | |
| 213 "strain_profile['Strain phenotype'] = strain_profile.apply(lambda x: (s_profiler(x)), axis=1)" | |
| 214 ] | |
| 215 }, | |
| 216 { | |
| 217 "cell_type": "code", | |
| 218 "execution_count": 170, | |
| 219 "id": "86441c0f", | |
| 220 "metadata": {}, | |
| 221 "outputs": [], | |
| 222 "source": [ | |
| 223 "#strain_profile.head()" | |
| 224 ] | |
| 225 }, | |
| 226 { | |
| 227 "cell_type": "code", | |
| 228 "execution_count": 171, | |
| 229 "id": "75698be5", | |
| 230 "metadata": {}, | |
| 231 "outputs": [], | |
| 232 "source": [ | |
| 233 "#rename headers for old name\n", | |
| 234 "strain_profile=strain_profile.rename(columns = {new_strain_name:old_strain_name, 'Ticarcillin/clavulanicacid':'Ticarcillin/ clavulanic acid','Piperacillin/tazobactam':'Piperacillin/ tazobactam','Trimethoprim/sulfamethoxazole': 'Trimethoprim/ sulfamethoxazole','Ampicillin/sulbactam':'Ampicillin/ sulbactam', 'Polymyxinb': 'Polymyxin B'} )" | |
| 235 ] | |
| 236 }, | |
| 237 { | |
| 238 "cell_type": "code", | |
| 239 "execution_count": 172, | |
| 240 "id": "c14a13eb", | |
| 241 "metadata": { | |
| 242 "scrolled": true | |
| 243 }, | |
| 244 "outputs": [], | |
| 245 "source": [ | |
| 246 "#strain_profile.columns" | |
| 247 ] | |
| 248 }, | |
| 249 { | |
| 250 "cell_type": "code", | |
| 251 "execution_count": 173, | |
| 252 "id": "1b113050", | |
| 253 "metadata": {}, | |
| 254 "outputs": [], | |
| 255 "source": [ | |
| 256 "#strain_profile" | |
| 257 ] | |
| 258 }, | |
| 259 { | |
| 260 "cell_type": "code", | |
| 261 "execution_count": 174, | |
| 262 "id": "5ab72211", | |
| 263 "metadata": {}, | |
| 264 "outputs": [], | |
| 265 "source": [ | |
| 266 "strain_profile.to_csv(output_file,na_rep='NA',index=False)" | |
| 267 ] | |
| 268 }, | |
| 269 { | |
| 270 "cell_type": "code", | |
| 271 "execution_count": 175, | |
| 272 "id": "c17c84c4", | |
| 273 "metadata": {}, | |
| 274 "outputs": [], | |
| 275 "source": [ | |
| 276 "# Open a file with access mode 'a'\n", | |
| 277 "with open(output_file, \"a\") as file_object:\n", | |
| 278 " # Append 'hello' at the end of file\n", | |
| 279 " file_object.write(\"Note: \\n1. 'MDR': Multidrug-resistant, 'XDR': Extensively drug-resistant, 'TDR':totally drug resistant, NA': Data Not Available.\\n2. 'Strain could not be classified' numbers follow the format as ('Number of antibiotics categories count as Intermediate' | 'Number of antibiotics categories count as NA')\")" | |
| 280 ] | |
| 281 }, | |
| 282 { | |
| 283 "cell_type": "code", | |
| 284 "execution_count": null, | |
| 285 "id": "7e8e1fa8", | |
| 286 "metadata": {}, | |
| 287 "outputs": [], | |
| 288 "source": [] | |
| 289 } | |
| 290 ], | |
| 291 "metadata": { | |
| 292 "kernelspec": { | |
| 293 "display_name": "Python 3", | |
| 294 "language": "python", | |
| 295 "name": "python3" | |
| 296 }, | |
| 297 "language_info": { | |
| 298 "codemirror_mode": { | |
| 299 "name": "ipython", | |
| 300 "version": 3 | |
| 301 }, | |
| 302 "file_extension": ".py", | |
| 303 "mimetype": "text/x-python", | |
| 304 "name": "python", | |
| 305 "nbconvert_exporter": "python", | |
| 306 "pygments_lexer": "ipython3", | |
| 307 "version": "3.7.10" | |
| 308 } | |
| 309 }, | |
| 310 "nbformat": 4, | |
| 311 "nbformat_minor": 5 | |
| 312 } |
