Mercurial > repos > rakesh4osdd > clsi_profile

diff asist_dynamic.ipynb @ 11:7dcc0e93288b draft default tip
"planemo upload for repository https://github.com/rakesh4osdd/asist/tree/master commit f590c3b1d71a9b8f2030909fa488b4ac0c3caed8-dirty"
author: rakesh4osdd
date: Wed, 30 Jun 2021 07:13:29 +0000
parents: 116ebdf92e39
--- a/asist_dynamic.ipynb	Wed Jun 30 06:59:29 2021 +0000
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,312 +0,0 @@
-{
- "cells": [
-  {
-   "cell_type": "code",
-   "execution_count": 1309,
-   "id": "27cfc66f",
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "#ASIST program for phenotype based on Antibiotics profile\n",
-    "# create a profile based on selected antibiotics only\n",
-    "# rakesh4osdd@gmail.com, 14-June-2021"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 1,
-   "id": "75a352b7",
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "import pandas as pd\n",
-    "import sys\n",
-    "import os\n",
-    "from collections import Counter"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 162,
-   "id": "d66ec0d2",
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "#input_file=sys.argv[1]\n",
-    "#output_file=sys.argv[2]\n",
-    "input_file='test-data/strains_788_input_16k.csv'\n",
-    "output_file='/mnt/d/PhD_Work/Tina_Work/ASIST_Galaxy/ASIST/strains_788_output_16k.csv'\n",
-    "#input_file='/mnt/d/PhD_Work/Tina_Work/ASIST_Galaxy/ASIST/asist_example15.csv'\n",
-    "#output_file='/mnt/d/PhD_Work/Tina_Work/ASIST_Galaxy/ASIST/asist_example15_output.csv'"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 163,
-   "id": "bf24c946",
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "# strain_profile to phenotype condition\n",
-    "def s_phen(sus,res,intm,na,pb_sus):\n",
-    "    if (sus>0 and res==0 and na>=0):\n",
-    "        #print('Possible Susceptible')\n",
-    "        phen='Possible Susceptible'\n",
-    "    elif (sus>=0 and 3<=res<7 and na>=0 and pb_sus==0):\n",
-    "        #print('Possible MDR')\n",
-    "        phen='Possible MDR'\n",
-    "    elif (sus>=0 and 7<=res<9 and na>=0 and pb_sus==0):\n",
-    "        #print('Possible XDR')\n",
-    "        phen='Possible XDR'\n",
-    "    #special cases\n",
-    "    elif (sus>=1 and res>0 and na>=0 and pb_sus==1):\n",
-    "        #print('Possible XDR')\n",
-    "        phen='Possible XDR'\n",
-    "    #special cases\n",
-    "    elif (sus>0 and res==9 and na>=0):\n",
-    "        #print('Possible XDR')\n",
-    "        phen='Possible XDR'\n",
-    "    elif (sus==0 and res==9 and na>=0):\n",
-    "        #print('Possible TDR')\n",
-    "        phen='Possible TDR'\n",
-    "    else:\n",
-    "        #print('Strain could not be classified')\n",
-    "        phen='Strain could not be classified ('+ str(intm)+' | ' + str(na) +')'\n",
-    "    return(phen)\n",
-    "\n",
-    "#print(s_phen(1,9,0,0))"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 164,
-   "id": "8bad7d9d",
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "# define Antibiotic groups as per antibiotic of CLSI breakpoints MIC\n",
-    "#Aminoglycoside\n",
-    "cat1=['Amikacin','Tobramycin','Gentamycin','Netilmicin']\n",
-    "#Beta-lactams- Carbapenems\n",
-    "cat2=['Imipenem','Meropenam','Doripenem']\n",
-    "#Fluoroquinolone\n",
-    "cat3=['Ciprofloxacin','Levofloxacin']\n",
-    "#Beta-lactam inhibitor\n",
-    "cat4=['Piperacillin/tazobactam','Ticarcillin/clavulanicacid']\n",
-    "#Cephalosporin\n",
-    "cat5=['Cefotaxime','Ceftriaxone','Ceftazidime','Cefepime']\n",
-    "#Sulfonamides\n",
-    "cat6=['Trimethoprim/sulfamethoxazole']\n",
-    "#Penicillins/beta-lactamase\n",
-    "cat7=['Ampicillin/sulbactam']\n",
-    "#Polymyxins\n",
-    "cat8=['Colistin','Polymyxinb']\n",
-    "#Tetracycline\n",
-    "cat9=['Tetracycline','Doxicycline','Minocycline']\n",
-    "\n",
-    "def s_profiler(pd_series):\n",
-    "    #print(type(pd_series),'\\n', pd_series)\n",
-    "    #create a dictionary of dataframe series\n",
-    "    cats={'s1':cat1,'s2':cat2,'s3':cat3,'s4':cat4,'s5':cat5,'s6':cat6,'s7':cat7,'s8':cat8,'s9':cat9}\n",
-    "    # find the antibiotics name in input series\n",
-    "    for cat in cats:\n",
-    "        #print(cats[cat])\n",
-    "        cats[cat]=pd_series.filter(cats[cat])\n",
-    "        #print(cats[cat])\n",
-    "    #define res,sus,intm,na,pb_sus\n",
-    "    res=0\n",
-    "    sus=0\n",
-    "    intm=0\n",
-    "    na=0\n",
-    "    pb_sus=0\n",
-    "    # special case of 'Polymyxin b' for its value\n",
-    "    if 'Polymyxinb' in pd_series:\n",
-    "        ctp=cats['s8']['Polymyxinb'].strip().lower()\n",
-    "        if ctp == 'susceptible':\n",
-    "            pb_sus=1\n",
-    "        #print((ctp,p_sus))\n",
-    "    # check all categories\n",
-    "    for cat in cats:\n",
-    "        #ctp=cats['s8'].iloc[i:i+1].stack().value_counts().to_dict()\n",
-    "        #print(ctp)\n",
-    "        # Pandas series\n",
-    "        ct=cats[cat].value_counts().to_dict()\n",
-    "        #print(ct)\n",
-    "        # remove whitespace and convert to lowercase words\n",
-    "        ct =  {k.strip().lower(): v for k, v in ct.items()}\n",
-    "        #print(ct)\n",
-    "        k=Counter(ct)\n",
-    "        #j=Counter(ct)+Counter(j)\n",
-    "        #print(j)\n",
-    "        # category wise marking\n",
-    "        if k['resistant']>=1:\n",
-    "            res=res+1\n",
-    "        if k['susceptible']>=1:\n",
-    "            sus=sus+1\n",
-    "        if k['intermediate']>=1:\n",
-    "            intm=intm+1\n",
-    "        if k['na']>=1:\n",
-    "            na=na+1\n",
-    "    #print(sus,res,intm,na,pb_sus)\n",
-    "    #print(s_phen(sus,res,intm,na,pb_sus))\n",
-    "    return(s_phen(sus,res,intm,na,pb_sus))"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 165,
-   "id": "7629fc10",
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "#input_file='input2.csv_table.csv'\n",
-    "#output_file=input_file+'_output.txt'\n",
-    "strain_profile=pd.read_csv(input_file, sep=',',na_filter=False,skipinitialspace = True)"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 166,
-   "id": "bed1abba",
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "old_strain_name=strain_profile.columns[0]\n",
-    "new_strain_name=old_strain_name.capitalize().strip().replace(' ', '')"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 167,
-   "id": "a64b5022",
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "# make header capitalization, remove leading,lagging, and multiple whitespace for comparision\n",
-    "strain_profile.columns=strain_profile.columns.str.capitalize().str.strip().str.replace('\\s+', '', regex=True)\n",
-    "#print(strain_profile.columns)\n",
-    "#strain_profile.head()\n",
-    "#strain_profile.columns"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 168,
-   "id": "caac57d7",
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "# add new column in dataframe on second position\n",
-    "strain_profile.insert(1, 'Strain phenotype','')\n",
-    "#strain_profile.head()"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 169,
-   "id": "eb4b0c4d",
-   "metadata": {
-    "scrolled": true
-   },
-   "outputs": [],
-   "source": [
-    "strain_profile['Strain phenotype'] = strain_profile.apply(lambda x: (s_profiler(x)), axis=1)"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 170,
-   "id": "86441c0f",
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "#strain_profile.head()"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 171,
-   "id": "75698be5",
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "#rename headers for old name\n",
-    "strain_profile=strain_profile.rename(columns = {new_strain_name:old_strain_name, 'Ticarcillin/clavulanicacid':'Ticarcillin/ clavulanic acid','Piperacillin/tazobactam':'Piperacillin/ tazobactam','Trimethoprim/sulfamethoxazole': 'Trimethoprim/ sulfamethoxazole','Ampicillin/sulbactam':'Ampicillin/ sulbactam', 'Polymyxinb': 'Polymyxin B'} )"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 172,
-   "id": "c14a13eb",
-   "metadata": {
-    "scrolled": true
-   },
-   "outputs": [],
-   "source": [
-    "#strain_profile.columns"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 173,
-   "id": "1b113050",
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "#strain_profile"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 174,
-   "id": "5ab72211",
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "strain_profile.to_csv(output_file,na_rep='NA',index=False)"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 175,
-   "id": "c17c84c4",
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "# Open a file with access mode 'a'\n",
-    "with open(output_file, \"a\") as file_object:\n",
-    "    # Append 'hello' at the end of file\n",
-    "    file_object.write(\"Note: \\n1. 'MDR': Multidrug-resistant, 'XDR': Extensively drug-resistant, 'TDR':totally drug resistant, NA': Data Not Available.\\n2. 'Strain could not be classified' numbers follow the format as ('Number of antibiotics categories count as Intermediate' | 'Number of antibiotics categories count as NA')\")"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "id": "7e8e1fa8",
-   "metadata": {},
-   "outputs": [],
-   "source": []
-  }
- ],
- "metadata": {
-  "kernelspec": {
-   "display_name": "Python 3",
-   "language": "python",
-   "name": "python3"
-  },
-  "language_info": {
-   "codemirror_mode": {
-    "name": "ipython",
-    "version": 3
-   },
-   "file_extension": ".py",
-   "mimetype": "text/x-python",
-   "name": "python",
-   "nbconvert_exporter": "python",
-   "pygments_lexer": "ipython3",
-   "version": "3.7.10"
-  }
- },
- "nbformat": 4,
- "nbformat_minor": 5
-}
author	rakesh4osdd
date	Wed, 30 Jun 2021 07:13:29 +0000
parents	116ebdf92e39
children