Mercurial > repos > rakesh4osdd > clsi_profile
comparison clsi_profile.py @ 5:3c27e5c2a8e9 draft
"planemo upload for repository https://github.com/rakesh4osdd/asist/tree/master commit d4b81c15919b4b87d215eaf1b792c8f251665647"
| author | rakesh4osdd |
|---|---|
| date | Tue, 29 Jun 2021 12:58:49 +0000 |
| parents | 2a5861818faf |
| children |
comparison
equal
deleted
inserted
replaced
| 4:2a5861818faf | 5:3c27e5c2a8e9 |
|---|---|
| 1 #!/usr/bin/env python | 1 #!/usr/bin/env python |
| 2 # coding: utf-8 | 2 # coding: utf-8 |
| 3 | 3 |
| 4 # In[115]: | 4 # In[7]: |
| 5 | 5 |
| 6 | 6 |
| 7 # ASIST module2 | map AST result to the CLSI breakporints with combination antibiotics | 7 # ASIST module2 | map AST result to the CLSI breakporints with combination antibiotics |
| 8 # By rakesh4osdd@gmail.com, 06-Jun-2021 | 8 # By rakesh4osdd@gmail.com, 06-Jun-2021 |
| 9 import pandas as pd | 9 import pandas as pd |
| 10 import re | 10 import re |
| 11 import sys | 11 import sys |
| 12 | 12 |
| 13 | 13 |
| 14 # In[116]: | 14 # In[8]: |
| 15 | 15 |
| 16 | 16 |
| 17 #print(pd.__version__, re.__version__) | 17 #print(pd.__version__, re.__version__) |
| 18 | 18 |
| 19 | 19 |
| 20 # In[117]: | 20 # In[9]: |
| 21 | 21 |
| 22 | 22 |
| 23 # compare two MIC value strings | 23 # compare two MIC value strings |
| 24 def check_mic(mic1,mic2,mic_type): | 24 def check_mic(mic1,mic2,mic_type): |
| 25 #print(mic1,mic2,mic_type) | 25 #print(mic1,mic2,mic_type) |
| 103 return(m_type) | 103 return(m_type) |
| 104 | 104 |
| 105 #check_mic('65','32-64','i') | 105 #check_mic('65','32-64','i') |
| 106 | 106 |
| 107 | 107 |
| 108 # In[118]: | 108 # In[10]: |
| 109 | 109 |
| 110 | 110 |
| 111 # compare MIC value in pandas list | 111 # compare MIC value in pandas list |
| 112 def sus_res_int(mic): | 112 def sus_res_int(mic): |
| 113 #print(mic) | 113 #print(mic) |
| 130 | 130 |
| 131 #mic=['128','16/4','128/4','32/4-64/4'] | 131 #mic=['128','16/4','128/4','32/4-64/4'] |
| 132 #sus_res_int(mic) | 132 #sus_res_int(mic) |
| 133 | 133 |
| 134 | 134 |
| 135 # In[119]: | 135 # In[11]: |
| 136 | 136 |
| 137 | 137 |
| 138 # for input argument | 138 # for input argument |
| 139 input_user = sys.argv[1] | 139 input_user = sys.argv[1] |
| 140 input_clsi = sys.argv[2] | 140 input_clsi = sys.argv[2] |
| 141 output_table = sys.argv[3] | 141 output_table = sys.argv[3] |
| 142 | 142 |
| 143 | 143 |
| 144 # In[3]: | 144 # In[49]: |
| 145 | 145 |
| 146 | 146 |
| 147 """#input_user='~/Jupyterlab_notebook/ASIST_module/strain_profiles_16k.csv.csv' | 147 """input_user='~/Jupyterlab_notebook/ASIST_module/strain_profiles_16k.csv.csv' |
| 148 input_user='test-data/input2.csv' | 148 #input_user='test-data/input2.csv' |
| 149 input_clsi='test-data/clsi.csv' | 149 input_clsi='test-data/clsi.csv' |
| 150 output_profile='test-data/input2_profile.csv' | 150 output_profile='test-data/input2_profile.csv' |
| 151 output_table='test-data/input2_table.csv' | 151 #output_table='test-data/input2_table.csv' |
| 152 #output_table='/home/rakesh/Jupyterlab_notebook/ASIST_module/strain_profiles_16k_table.csv'""" | 152 output_table='/home/rakesh/Jupyterlab_notebook/ASIST_module/strain_profiles_16k_table.csv'""" |
| 153 | 153 |
| 154 | 154 |
| 155 # In[146]: | 155 # In[60]: |
| 156 | 156 |
| 157 | 157 |
| 158 # read user AST data with selected 3 columns | 158 # read user AST data with selected 3 columns |
| 159 strain_mic=pd.read_csv(input_user, sep=',', usecols =['Strain name', 'Antibiotics', 'MIC'],na_filter=False) | 159 strain_mic=pd.read_csv(input_user, sep=',', usecols =['Strain name', 'Antibiotics', 'MIC'],na_filter=False) |
| 160 #strain_mic | 160 #strain_mic |
| 161 | 161 |
| 162 | 162 |
| 163 # In[147]: | 163 # In[61]: |
| 164 | 164 |
| 165 | 165 |
| 166 clsi_bp=pd.read_csv(input_clsi,sep=',') | 166 clsi_bp=pd.read_csv(input_clsi,sep=',') |
| 167 | 167 |
| 168 #clsi_bp[clsi_bp[['Antibiotics', 'Susceptible']].duplicated()].shape | 168 #clsi_bp[clsi_bp[['Antibiotics', 'Susceptible']].duplicated()].shape |
| 169 | 169 |
| 170 | 170 |
| 171 # In[148]: | 171 # In[62]: |
| 172 | 172 |
| 173 | 173 |
| 174 #clsi_bp | 174 #clsi_bp |
| 175 #strain_mic | 175 #strain_mic |
| 176 | 176 |
| 177 | 177 |
| 178 # In[149]: | 178 # In[64]: |
| 179 | 179 |
| 180 | 180 |
| 181 # warn user for duplicate files | |
| 181 input_dups=strain_mic[strain_mic[['Strain name','Antibiotics']].duplicated()] | 182 input_dups=strain_mic[strain_mic[['Strain name','Antibiotics']].duplicated()] |
| 182 if (input_dups.shape[0] == 0): | 183 if (input_dups.shape[0] == 0): |
| 183 #print( "No duplicates") | 184 #print( "No duplicates") |
| 184 pass | 185 pass |
| 185 else: | 186 else: |
| 186 input_dups.to_csv(output_table,na_rep='NA') | 187 with open(output_table, "w") as file_object: |
| 187 with open(output_table, "a") as file_object: | |
| 188 # Append 'hello' at the end of file | 188 # Append 'hello' at the end of file |
| 189 file_object.write('Input File Error: Please remove duplicate/mutiple MIC values for same combination of Strain name and Antibiotics from input file') | 189 file_object.write('S.No.,Strain name,Antibiotics,MIC\nInput File Error: Please remove duplicate/mutiple MIC values for same combination of Strain name and Antibiotics from input file\n') |
| 190 input_dups.to_csv(output_table,na_rep='NA', mode='a') | |
| 190 exit() | 191 exit() |
| 191 #input_dups.head() | 192 |
| 192 | 193 |
| 193 | 194 # In[17]: |
| 194 # In[125]: | |
| 195 | 195 |
| 196 | 196 |
| 197 # convert MIC to numbers sMIC, rMIC | 197 # convert MIC to numbers sMIC, rMIC |
| 198 clsi_bp['s_mic'] =clsi_bp[['Susceptible']].applymap(lambda x: (re.sub(r'[^0-9.\/-]', '', x))) | 198 clsi_bp['s_mic'] =clsi_bp[['Susceptible']].applymap(lambda x: (re.sub(r'[^0-9.\/-]', '', x))) |
| 199 clsi_bp['r_mic'] =clsi_bp[['Resistant']].applymap(lambda x: (re.sub(r'[^0-9.\/-]', '', x))) | 199 clsi_bp['r_mic'] =clsi_bp[['Resistant']].applymap(lambda x: (re.sub(r'[^0-9.\/-]', '', x))) |
| 200 clsi_bp['i_mic'] = clsi_bp[['Intermediate']].applymap(lambda x: (re.sub(r'[^0-9.\/-]', '', x))) | 200 clsi_bp['i_mic'] = clsi_bp[['Intermediate']].applymap(lambda x: (re.sub(r'[^0-9.\/-]', '', x))) |
| 201 | 201 |
| 202 | 202 |
| 203 # In[126]: | 203 # In[18]: |
| 204 | 204 |
| 205 | 205 |
| 206 #clsi_bp['i_mic'] = clsi_bp[['Intermediate']].applymap(lambda x: (re.sub(r'[^0-9.\/-]', '', x))) | 206 #clsi_bp['i_mic'] = clsi_bp[['Intermediate']].applymap(lambda x: (re.sub(r'[^0-9.\/-]', '', x))) |
| 207 | 207 |
| 208 | 208 |
| 209 # In[127]: | 209 # In[19]: |
| 210 | 210 |
| 211 | 211 |
| 212 # Read only numbers in MIC values | 212 # Read only numbers in MIC values |
| 213 #try: | 213 #try: |
| 214 strain_mic['o_mic']=strain_mic[['MIC']].applymap(lambda x: (re.sub(r'[^0-9.\/]','', x))) | 214 strain_mic['o_mic']=strain_mic[['MIC']].applymap(lambda x: (re.sub(r'[^0-9.\/]','', x))) |
| 215 #except TypeError: | 215 #except TypeError: |
| 216 # print('Waring: Error in MIC value') | 216 # print('Waring: Error in MIC value') |
| 217 | 217 |
| 218 | 218 |
| 219 # In[128]: | 219 # In[20]: |
| 220 | 220 |
| 221 | 221 |
| 222 #strain_mic | 222 #strain_mic |
| 223 | 223 |
| 224 | 224 |
| 225 # In[129]: | 225 # In[21]: |
| 226 | 226 |
| 227 | 227 |
| 228 # capitalize each Antibiotic Name for comparision with removing whitespace | 228 # capitalize each Antibiotic Name for comparision with removing whitespace |
| 229 strain_mic['Strain name']=strain_mic['Strain name'].str.capitalize().str.replace(" ","") | 229 strain_mic['Strain name']=strain_mic['Strain name'].str.capitalize().str.replace(" ","") |
| 230 strain_mic['Antibiotics']=strain_mic['Antibiotics'].str.capitalize().str.replace(" ","") | 230 strain_mic['Antibiotics']=strain_mic['Antibiotics'].str.capitalize().str.replace(" ","") |
| 231 | 231 |
| 232 clsi_bp['Antibiotics']=clsi_bp['Antibiotics'].str.capitalize().str.replace(" ","") | 232 clsi_bp['Antibiotics']=clsi_bp['Antibiotics'].str.capitalize().str.replace(" ","") |
| 233 | 233 |
| 234 | 234 |
| 235 # In[130]: | 235 # In[22]: |
| 236 | 236 |
| 237 | 237 |
| 238 #find duplicate values in input files | 238 #find duplicate values in input files |
| 239 dups=strain_mic[strain_mic[['Strain name', 'Antibiotics']].duplicated(keep=False)] | 239 dups=strain_mic[strain_mic[['Strain name', 'Antibiotics']].duplicated(keep=False)] |
| 240 if dups.shape[0] != 0: | 240 if dups.shape[0] != 0: |
| 247 result=pd.merge(strain_mic, clsi_bp, on='Antibiotics',how='inner')[['Strain name','Antibiotics', 'MIC', 'o_mic', 's_mic', 'r_mic','i_mic']] | 247 result=pd.merge(strain_mic, clsi_bp, on='Antibiotics',how='inner')[['Strain name','Antibiotics', 'MIC', 'o_mic', 's_mic', 'r_mic','i_mic']] |
| 248 except KeyError: | 248 except KeyError: |
| 249 print('Waring: Error in input Values') | 249 print('Waring: Error in input Values') |
| 250 | 250 |
| 251 | 251 |
| 252 # In[131]: | 252 # In[23]: |
| 253 | 253 |
| 254 | 254 |
| 255 #result | 255 dups.head() |
| 256 | 256 |
| 257 | 257 |
| 258 # In[132]: | 258 # In[132]: |
| 259 | 259 |
| 260 | 260 |
