Mercurial > repos > rakesh4osdd > clsi_profile
diff asist_dynamic.ipynb @ 11:7dcc0e93288b draft default tip
"planemo upload for repository https://github.com/rakesh4osdd/asist/tree/master commit f590c3b1d71a9b8f2030909fa488b4ac0c3caed8-dirty"
author | rakesh4osdd |
---|---|
date | Wed, 30 Jun 2021 07:13:29 +0000 |
parents | 116ebdf92e39 |
children |
line wrap: on
line diff
--- a/asist_dynamic.ipynb Wed Jun 30 06:59:29 2021 +0000 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,312 +0,0 @@ -{ - "cells": [ - { - "cell_type": "code", - "execution_count": 1309, - "id": "27cfc66f", - "metadata": {}, - "outputs": [], - "source": [ - "#ASIST program for phenotype based on Antibiotics profile\n", - "# create a profile based on selected antibiotics only\n", - "# rakesh4osdd@gmail.com, 14-June-2021" - ] - }, - { - "cell_type": "code", - "execution_count": 1, - "id": "75a352b7", - "metadata": {}, - "outputs": [], - "source": [ - "import pandas as pd\n", - "import sys\n", - "import os\n", - "from collections import Counter" - ] - }, - { - "cell_type": "code", - "execution_count": 162, - "id": "d66ec0d2", - "metadata": {}, - "outputs": [], - "source": [ - "#input_file=sys.argv[1]\n", - "#output_file=sys.argv[2]\n", - "input_file='test-data/strains_788_input_16k.csv'\n", - "output_file='/mnt/d/PhD_Work/Tina_Work/ASIST_Galaxy/ASIST/strains_788_output_16k.csv'\n", - "#input_file='/mnt/d/PhD_Work/Tina_Work/ASIST_Galaxy/ASIST/asist_example15.csv'\n", - "#output_file='/mnt/d/PhD_Work/Tina_Work/ASIST_Galaxy/ASIST/asist_example15_output.csv'" - ] - }, - { - "cell_type": "code", - "execution_count": 163, - "id": "bf24c946", - "metadata": {}, - "outputs": [], - "source": [ - "# strain_profile to phenotype condition\n", - "def s_phen(sus,res,intm,na,pb_sus):\n", - " if (sus>0 and res==0 and na>=0):\n", - " #print('Possible Susceptible')\n", - " phen='Possible Susceptible'\n", - " elif (sus>=0 and 3<=res<7 and na>=0 and pb_sus==0):\n", - " #print('Possible MDR')\n", - " phen='Possible MDR'\n", - " elif (sus>=0 and 7<=res<9 and na>=0 and pb_sus==0):\n", - " #print('Possible XDR')\n", - " phen='Possible XDR'\n", - " #special cases\n", - " elif (sus>=1 and res>0 and na>=0 and pb_sus==1):\n", - " #print('Possible XDR')\n", - " phen='Possible XDR'\n", - " #special cases\n", - " elif (sus>0 and res==9 and na>=0):\n", - " #print('Possible XDR')\n", - " phen='Possible XDR'\n", - " elif (sus==0 and res==9 and na>=0):\n", - " #print('Possible TDR')\n", - " phen='Possible TDR'\n", - " else:\n", - " #print('Strain could not be classified')\n", - " phen='Strain could not be classified ('+ str(intm)+' | ' + str(na) +')'\n", - " return(phen)\n", - "\n", - "#print(s_phen(1,9,0,0))" - ] - }, - { - "cell_type": "code", - "execution_count": 164, - "id": "8bad7d9d", - "metadata": {}, - "outputs": [], - "source": [ - "# define Antibiotic groups as per antibiotic of CLSI breakpoints MIC\n", - "#Aminoglycoside\n", - "cat1=['Amikacin','Tobramycin','Gentamycin','Netilmicin']\n", - "#Beta-lactams- Carbapenems\n", - "cat2=['Imipenem','Meropenam','Doripenem']\n", - "#Fluoroquinolone\n", - "cat3=['Ciprofloxacin','Levofloxacin']\n", - "#Beta-lactam inhibitor\n", - "cat4=['Piperacillin/tazobactam','Ticarcillin/clavulanicacid']\n", - "#Cephalosporin\n", - "cat5=['Cefotaxime','Ceftriaxone','Ceftazidime','Cefepime']\n", - "#Sulfonamides\n", - "cat6=['Trimethoprim/sulfamethoxazole']\n", - "#Penicillins/beta-lactamase\n", - "cat7=['Ampicillin/sulbactam']\n", - "#Polymyxins\n", - "cat8=['Colistin','Polymyxinb']\n", - "#Tetracycline\n", - "cat9=['Tetracycline','Doxicycline','Minocycline']\n", - "\n", - "def s_profiler(pd_series):\n", - " #print(type(pd_series),'\\n', pd_series)\n", - " #create a dictionary of dataframe series\n", - " cats={'s1':cat1,'s2':cat2,'s3':cat3,'s4':cat4,'s5':cat5,'s6':cat6,'s7':cat7,'s8':cat8,'s9':cat9}\n", - " # find the antibiotics name in input series\n", - " for cat in cats:\n", - " #print(cats[cat])\n", - " cats[cat]=pd_series.filter(cats[cat])\n", - " #print(cats[cat])\n", - " #define res,sus,intm,na,pb_sus\n", - " res=0\n", - " sus=0\n", - " intm=0\n", - " na=0\n", - " pb_sus=0\n", - " # special case of 'Polymyxin b' for its value\n", - " if 'Polymyxinb' in pd_series:\n", - " ctp=cats['s8']['Polymyxinb'].strip().lower()\n", - " if ctp == 'susceptible':\n", - " pb_sus=1\n", - " #print((ctp,p_sus))\n", - " # check all categories\n", - " for cat in cats:\n", - " #ctp=cats['s8'].iloc[i:i+1].stack().value_counts().to_dict()\n", - " #print(ctp)\n", - " # Pandas series\n", - " ct=cats[cat].value_counts().to_dict()\n", - " #print(ct)\n", - " # remove whitespace and convert to lowercase words\n", - " ct = {k.strip().lower(): v for k, v in ct.items()}\n", - " #print(ct)\n", - " k=Counter(ct)\n", - " #j=Counter(ct)+Counter(j)\n", - " #print(j)\n", - " # category wise marking\n", - " if k['resistant']>=1:\n", - " res=res+1\n", - " if k['susceptible']>=1:\n", - " sus=sus+1\n", - " if k['intermediate']>=1:\n", - " intm=intm+1\n", - " if k['na']>=1:\n", - " na=na+1\n", - " #print(sus,res,intm,na,pb_sus)\n", - " #print(s_phen(sus,res,intm,na,pb_sus))\n", - " return(s_phen(sus,res,intm,na,pb_sus))" - ] - }, - { - "cell_type": "code", - "execution_count": 165, - "id": "7629fc10", - "metadata": {}, - "outputs": [], - "source": [ - "#input_file='input2.csv_table.csv'\n", - "#output_file=input_file+'_output.txt'\n", - "strain_profile=pd.read_csv(input_file, sep=',',na_filter=False,skipinitialspace = True)" - ] - }, - { - "cell_type": "code", - "execution_count": 166, - "id": "bed1abba", - "metadata": {}, - "outputs": [], - "source": [ - "old_strain_name=strain_profile.columns[0]\n", - "new_strain_name=old_strain_name.capitalize().strip().replace(' ', '')" - ] - }, - { - "cell_type": "code", - "execution_count": 167, - "id": "a64b5022", - "metadata": {}, - "outputs": [], - "source": [ - "# make header capitalization, remove leading,lagging, and multiple whitespace for comparision\n", - "strain_profile.columns=strain_profile.columns.str.capitalize().str.strip().str.replace('\\s+', '', regex=True)\n", - "#print(strain_profile.columns)\n", - "#strain_profile.head()\n", - "#strain_profile.columns" - ] - }, - { - "cell_type": "code", - "execution_count": 168, - "id": "caac57d7", - "metadata": {}, - "outputs": [], - "source": [ - "# add new column in dataframe on second position\n", - "strain_profile.insert(1, 'Strain phenotype','')\n", - "#strain_profile.head()" - ] - }, - { - "cell_type": "code", - "execution_count": 169, - "id": "eb4b0c4d", - "metadata": { - "scrolled": true - }, - "outputs": [], - "source": [ - "strain_profile['Strain phenotype'] = strain_profile.apply(lambda x: (s_profiler(x)), axis=1)" - ] - }, - { - "cell_type": "code", - "execution_count": 170, - "id": "86441c0f", - "metadata": {}, - "outputs": [], - "source": [ - "#strain_profile.head()" - ] - }, - { - "cell_type": "code", - "execution_count": 171, - "id": "75698be5", - "metadata": {}, - "outputs": [], - "source": [ - "#rename headers for old name\n", - "strain_profile=strain_profile.rename(columns = {new_strain_name:old_strain_name, 'Ticarcillin/clavulanicacid':'Ticarcillin/ clavulanic acid','Piperacillin/tazobactam':'Piperacillin/ tazobactam','Trimethoprim/sulfamethoxazole': 'Trimethoprim/ sulfamethoxazole','Ampicillin/sulbactam':'Ampicillin/ sulbactam', 'Polymyxinb': 'Polymyxin B'} )" - ] - }, - { - "cell_type": "code", - "execution_count": 172, - "id": "c14a13eb", - "metadata": { - "scrolled": true - }, - "outputs": [], - "source": [ - "#strain_profile.columns" - ] - }, - { - "cell_type": "code", - "execution_count": 173, - "id": "1b113050", - "metadata": {}, - "outputs": [], - "source": [ - "#strain_profile" - ] - }, - { - "cell_type": "code", - "execution_count": 174, - "id": "5ab72211", - "metadata": {}, - "outputs": [], - "source": [ - "strain_profile.to_csv(output_file,na_rep='NA',index=False)" - ] - }, - { - "cell_type": "code", - "execution_count": 175, - "id": "c17c84c4", - "metadata": {}, - "outputs": [], - "source": [ - "# Open a file with access mode 'a'\n", - "with open(output_file, \"a\") as file_object:\n", - " # Append 'hello' at the end of file\n", - " file_object.write(\"Note: \\n1. 'MDR': Multidrug-resistant, 'XDR': Extensively drug-resistant, 'TDR':totally drug resistant, NA': Data Not Available.\\n2. 'Strain could not be classified' numbers follow the format as ('Number of antibiotics categories count as Intermediate' | 'Number of antibiotics categories count as NA')\")" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "7e8e1fa8", - "metadata": {}, - "outputs": [], - "source": [] - } - ], - "metadata": { - "kernelspec": { - "display_name": "Python 3", - "language": "python", - "name": "python3" - }, - "language_info": { - "codemirror_mode": { - "name": "ipython", - "version": 3 - }, - "file_extension": ".py", - "mimetype": "text/x-python", - "name": "python", - "nbconvert_exporter": "python", - "pygments_lexer": "ipython3", - "version": "3.7.10" - } - }, - "nbformat": 4, - "nbformat_minor": 5 -}