Mercurial > repos > rakesh4osdd > clsi_profile
changeset 5:3c27e5c2a8e9 draft
"planemo upload for repository https://github.com/rakesh4osdd/asist/tree/master commit d4b81c15919b4b87d215eaf1b792c8f251665647"
author | rakesh4osdd |
---|---|
date | Tue, 29 Jun 2021 12:58:49 +0000 |
parents | 2a5861818faf |
children | b67a3c53cc69 |
files | clsi_profile.py clsi_profile_type2_linux.ipynb |
diffstat | 2 files changed, 107 insertions(+), 74 deletions(-) [+] |
line wrap: on
line diff
--- a/clsi_profile.py Tue Jun 29 12:15:06 2021 +0000 +++ b/clsi_profile.py Tue Jun 29 12:58:49 2021 +0000 @@ -1,7 +1,7 @@ #!/usr/bin/env python # coding: utf-8 -# In[115]: +# In[7]: # ASIST module2 | map AST result to the CLSI breakporints with combination antibiotics @@ -11,13 +11,13 @@ import sys -# In[116]: +# In[8]: #print(pd.__version__, re.__version__) -# In[117]: +# In[9]: # compare two MIC value strings @@ -105,7 +105,7 @@ #check_mic('65','32-64','i') -# In[118]: +# In[10]: # compare MIC value in pandas list @@ -132,7 +132,7 @@ #sus_res_int(mic) -# In[119]: +# In[11]: # for input argument @@ -141,18 +141,18 @@ output_table = sys.argv[3] -# In[3]: +# In[49]: -"""#input_user='~/Jupyterlab_notebook/ASIST_module/strain_profiles_16k.csv.csv' -input_user='test-data/input2.csv' +"""input_user='~/Jupyterlab_notebook/ASIST_module/strain_profiles_16k.csv.csv' +#input_user='test-data/input2.csv' input_clsi='test-data/clsi.csv' output_profile='test-data/input2_profile.csv' -output_table='test-data/input2_table.csv' -#output_table='/home/rakesh/Jupyterlab_notebook/ASIST_module/strain_profiles_16k_table.csv'""" +#output_table='test-data/input2_table.csv' +output_table='/home/rakesh/Jupyterlab_notebook/ASIST_module/strain_profiles_16k_table.csv'""" -# In[146]: +# In[60]: # read user AST data with selected 3 columns @@ -160,7 +160,7 @@ #strain_mic -# In[147]: +# In[61]: clsi_bp=pd.read_csv(input_clsi,sep=',') @@ -168,30 +168,30 @@ #clsi_bp[clsi_bp[['Antibiotics', 'Susceptible']].duplicated()].shape -# In[148]: +# In[62]: #clsi_bp #strain_mic -# In[149]: +# In[64]: +# warn user for duplicate files input_dups=strain_mic[strain_mic[['Strain name','Antibiotics']].duplicated()] if (input_dups.shape[0] == 0): #print( "No duplicates") pass else: - input_dups.to_csv(output_table,na_rep='NA') - with open(output_table, "a") as file_object: + with open(output_table, "w") as file_object: # Append 'hello' at the end of file - file_object.write('Input File Error: Please remove duplicate/mutiple MIC values for same combination of Strain name and Antibiotics from input file') + file_object.write('S.No.,Strain name,Antibiotics,MIC\nInput File Error: Please remove duplicate/mutiple MIC values for same combination of Strain name and Antibiotics from input file\n') + input_dups.to_csv(output_table,na_rep='NA', mode='a') exit() -#input_dups.head() -# In[125]: +# In[17]: # convert MIC to numbers sMIC, rMIC @@ -200,13 +200,13 @@ clsi_bp['i_mic'] = clsi_bp[['Intermediate']].applymap(lambda x: (re.sub(r'[^0-9.\/-]', '', x))) -# In[126]: +# In[18]: #clsi_bp['i_mic'] = clsi_bp[['Intermediate']].applymap(lambda x: (re.sub(r'[^0-9.\/-]', '', x))) -# In[127]: +# In[19]: # Read only numbers in MIC values @@ -216,13 +216,13 @@ # print('Waring: Error in MIC value') -# In[128]: +# In[20]: #strain_mic -# In[129]: +# In[21]: # capitalize each Antibiotic Name for comparision with removing whitespace @@ -232,7 +232,7 @@ clsi_bp['Antibiotics']=clsi_bp['Antibiotics'].str.capitalize().str.replace(" ","") -# In[130]: +# In[22]: #find duplicate values in input files @@ -249,10 +249,10 @@ print('Waring: Error in input Values') -# In[131]: +# In[23]: -#result +dups.head() # In[132]:
--- a/clsi_profile_type2_linux.ipynb Tue Jun 29 12:15:06 2021 +0000 +++ b/clsi_profile_type2_linux.ipynb Tue Jun 29 12:58:49 2021 +0000 @@ -2,7 +2,7 @@ "cells": [ { "cell_type": "code", - "execution_count": 115, + "execution_count": 7, "id": "9aa0a6f7", "metadata": {}, "outputs": [], @@ -16,7 +16,7 @@ }, { "cell_type": "code", - "execution_count": 116, + "execution_count": 8, "id": "9af8387e", "metadata": {}, "outputs": [], @@ -26,7 +26,7 @@ }, { "cell_type": "code", - "execution_count": 117, + "execution_count": 9, "id": "73d0783c", "metadata": {}, "outputs": [], @@ -118,7 +118,7 @@ }, { "cell_type": "code", - "execution_count": 118, + "execution_count": 10, "id": "4d2ab1b1", "metadata": {}, "outputs": [], @@ -149,19 +149,20 @@ }, { "cell_type": "code", - "execution_count": 119, + "execution_count": 11, "id": "0e22ef0d", "metadata": {}, "outputs": [ { - "data": { - "text/plain": [ - "'# for input argument\\ninput_user = sys.argv[1]\\ninput_clsi = sys.argv[2]\\noutput_table = sys.argv[3]'" - ] - }, - "execution_count": 119, - "metadata": {}, - "output_type": "execute_result" + "ename": "IndexError", + "evalue": "list index out of range", + "output_type": "error", + "traceback": [ + "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", + "\u001b[0;31mIndexError\u001b[0m Traceback (most recent call last)", + "\u001b[0;32m<ipython-input-11-26f141926f14>\u001b[0m in \u001b[0;36m<module>\u001b[0;34m\u001b[0m\n\u001b[1;32m 2\u001b[0m \u001b[0minput_user\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0msys\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0margv\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;36m1\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 3\u001b[0m \u001b[0minput_clsi\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0msys\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0margv\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;36m2\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m----> 4\u001b[0;31m \u001b[0moutput_table\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0msys\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0margv\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;36m3\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m", + "\u001b[0;31mIndexError\u001b[0m: list index out of range" + ] } ], "source": [ @@ -173,33 +174,22 @@ }, { "cell_type": "code", - "execution_count": 3, + "execution_count": 49, "id": "21d5fe63", "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "\"#input_user='~/Jupyterlab_notebook/ASIST_module/strain_profiles_16k.csv.csv'\\ninput_user='test-data/input2.csv'\\ninput_clsi='test-data/clsi.csv'\\noutput_profile='test-data/input2_profile.csv'\\noutput_table='test-data/input2_table.csv'\\n#output_table='/home/rakesh/Jupyterlab_notebook/ASIST_module/strain_profiles_16k_table.csv'\"" - ] - }, - "execution_count": 3, - "metadata": {}, - "output_type": "execute_result" - } - ], + "outputs": [], "source": [ - "\"\"\"#input_user='~/Jupyterlab_notebook/ASIST_module/strain_profiles_16k.csv.csv'\n", - "input_user='test-data/input2.csv'\n", + "\"\"\"input_user='~/Jupyterlab_notebook/ASIST_module/strain_profiles_16k.csv.csv'\n", + "#input_user='test-data/input2.csv'\n", "input_clsi='test-data/clsi.csv'\n", "output_profile='test-data/input2_profile.csv'\n", - "output_table='test-data/input2_table.csv'\n", - "#output_table='/home/rakesh/Jupyterlab_notebook/ASIST_module/strain_profiles_16k_table.csv'\"\"\"" + "#output_table='test-data/input2_table.csv'\n", + "output_table='/home/rakesh/Jupyterlab_notebook/ASIST_module/strain_profiles_16k_table.csv'\"\"\"" ] }, { "cell_type": "code", - "execution_count": 146, + "execution_count": 60, "id": "1e64b025", "metadata": {}, "outputs": [], @@ -211,7 +201,7 @@ }, { "cell_type": "code", - "execution_count": 147, + "execution_count": 61, "id": "0d30ddc3", "metadata": {}, "outputs": [], @@ -223,7 +213,7 @@ }, { "cell_type": "code", - "execution_count": 148, + "execution_count": 62, "id": "a818676d", "metadata": {}, "outputs": [], @@ -234,27 +224,27 @@ }, { "cell_type": "code", - "execution_count": 149, + "execution_count": 64, "id": "c2aae757", "metadata": {}, "outputs": [], "source": [ + "# warn user for duplicate files\n", "input_dups=strain_mic[strain_mic[['Strain name','Antibiotics']].duplicated()]\n", "if (input_dups.shape[0] == 0):\n", " #print( \"No duplicates\")\n", " pass\n", "else:\n", - " input_dups.to_csv(output_table,na_rep='NA')\n", - " with open(output_table, \"a\") as file_object:\n", + " with open(output_table, \"w\") as file_object:\n", " # Append 'hello' at the end of file\n", - " file_object.write('Input File Error: Please remove duplicate/mutiple MIC values for same combination of Strain name and Antibiotics from input file')\n", - " exit()\n", - "#input_dups.head()" + " file_object.write('S.No.,Strain name,Antibiotics,MIC\\nInput File Error: Please remove duplicate/mutiple MIC values for same combination of Strain name and Antibiotics from input file\\n')\n", + " input_dups.to_csv(output_table,na_rep='NA', mode='a')\n", + " exit()" ] }, { "cell_type": "code", - "execution_count": 125, + "execution_count": 17, "id": "c6b4c59b", "metadata": {}, "outputs": [], @@ -267,7 +257,7 @@ }, { "cell_type": "code", - "execution_count": 126, + "execution_count": 18, "id": "d0171f94", "metadata": {}, "outputs": [], @@ -277,7 +267,7 @@ }, { "cell_type": "code", - "execution_count": 127, + "execution_count": 19, "id": "fe45b2dd", "metadata": {}, "outputs": [], @@ -291,7 +281,7 @@ }, { "cell_type": "code", - "execution_count": 128, + "execution_count": 20, "id": "ddbbe4d9", "metadata": {}, "outputs": [], @@ -301,7 +291,7 @@ }, { "cell_type": "code", - "execution_count": 129, + "execution_count": 21, "id": "640508f1", "metadata": {}, "outputs": [], @@ -315,7 +305,7 @@ }, { "cell_type": "code", - "execution_count": 130, + "execution_count": 22, "id": "b87426f4", "metadata": {}, "outputs": [], @@ -336,12 +326,55 @@ }, { "cell_type": "code", - "execution_count": 131, + "execution_count": 23, "id": "91bfc94d", "metadata": {}, - "outputs": [], + "outputs": [ + { + "data": { + "text/html": [ + "<div>\n", + "<style scoped>\n", + " .dataframe tbody tr th:only-of-type {\n", + " vertical-align: middle;\n", + " }\n", + "\n", + " .dataframe tbody tr th {\n", + " vertical-align: top;\n", + " }\n", + "\n", + " .dataframe thead th {\n", + " text-align: right;\n", + " }\n", + "</style>\n", + "<table border=\"1\" class=\"dataframe\">\n", + " <thead>\n", + " <tr style=\"text-align: right;\">\n", + " <th></th>\n", + " <th>Strain name</th>\n", + " <th>Antibiotics</th>\n", + " <th>MIC</th>\n", + " <th>o_mic</th>\n", + " </tr>\n", + " </thead>\n", + " <tbody>\n", + " </tbody>\n", + "</table>\n", + "</div>" + ], + "text/plain": [ + "Empty DataFrame\n", + "Columns: [Strain name, Antibiotics, MIC, o_mic]\n", + "Index: []" + ] + }, + "execution_count": 23, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ - "#result" + "dups.head()" ] }, {