comparison clsi_profile.py @ 5:3c27e5c2a8e9 draft

"planemo upload for repository https://github.com/rakesh4osdd/asist/tree/master commit d4b81c15919b4b87d215eaf1b792c8f251665647"
author rakesh4osdd
date Tue, 29 Jun 2021 12:58:49 +0000
parents 2a5861818faf
children
comparison
equal deleted inserted replaced
4:2a5861818faf 5:3c27e5c2a8e9
1 #!/usr/bin/env python 1 #!/usr/bin/env python
2 # coding: utf-8 2 # coding: utf-8
3 3
4 # In[115]: 4 # In[7]:
5 5
6 6
7 # ASIST module2 | map AST result to the CLSI breakporints with combination antibiotics 7 # ASIST module2 | map AST result to the CLSI breakporints with combination antibiotics
8 # By rakesh4osdd@gmail.com, 06-Jun-2021 8 # By rakesh4osdd@gmail.com, 06-Jun-2021
9 import pandas as pd 9 import pandas as pd
10 import re 10 import re
11 import sys 11 import sys
12 12
13 13
14 # In[116]: 14 # In[8]:
15 15
16 16
17 #print(pd.__version__, re.__version__) 17 #print(pd.__version__, re.__version__)
18 18
19 19
20 # In[117]: 20 # In[9]:
21 21
22 22
23 # compare two MIC value strings 23 # compare two MIC value strings
24 def check_mic(mic1,mic2,mic_type): 24 def check_mic(mic1,mic2,mic_type):
25 #print(mic1,mic2,mic_type) 25 #print(mic1,mic2,mic_type)
103 return(m_type) 103 return(m_type)
104 104
105 #check_mic('65','32-64','i') 105 #check_mic('65','32-64','i')
106 106
107 107
108 # In[118]: 108 # In[10]:
109 109
110 110
111 # compare MIC value in pandas list 111 # compare MIC value in pandas list
112 def sus_res_int(mic): 112 def sus_res_int(mic):
113 #print(mic) 113 #print(mic)
130 130
131 #mic=['128','16/4','128/4','32/4-64/4'] 131 #mic=['128','16/4','128/4','32/4-64/4']
132 #sus_res_int(mic) 132 #sus_res_int(mic)
133 133
134 134
135 # In[119]: 135 # In[11]:
136 136
137 137
138 # for input argument 138 # for input argument
139 input_user = sys.argv[1] 139 input_user = sys.argv[1]
140 input_clsi = sys.argv[2] 140 input_clsi = sys.argv[2]
141 output_table = sys.argv[3] 141 output_table = sys.argv[3]
142 142
143 143
144 # In[3]: 144 # In[49]:
145 145
146 146
147 """#input_user='~/Jupyterlab_notebook/ASIST_module/strain_profiles_16k.csv.csv' 147 """input_user='~/Jupyterlab_notebook/ASIST_module/strain_profiles_16k.csv.csv'
148 input_user='test-data/input2.csv' 148 #input_user='test-data/input2.csv'
149 input_clsi='test-data/clsi.csv' 149 input_clsi='test-data/clsi.csv'
150 output_profile='test-data/input2_profile.csv' 150 output_profile='test-data/input2_profile.csv'
151 output_table='test-data/input2_table.csv' 151 #output_table='test-data/input2_table.csv'
152 #output_table='/home/rakesh/Jupyterlab_notebook/ASIST_module/strain_profiles_16k_table.csv'""" 152 output_table='/home/rakesh/Jupyterlab_notebook/ASIST_module/strain_profiles_16k_table.csv'"""
153 153
154 154
155 # In[146]: 155 # In[60]:
156 156
157 157
158 # read user AST data with selected 3 columns 158 # read user AST data with selected 3 columns
159 strain_mic=pd.read_csv(input_user, sep=',', usecols =['Strain name', 'Antibiotics', 'MIC'],na_filter=False) 159 strain_mic=pd.read_csv(input_user, sep=',', usecols =['Strain name', 'Antibiotics', 'MIC'],na_filter=False)
160 #strain_mic 160 #strain_mic
161 161
162 162
163 # In[147]: 163 # In[61]:
164 164
165 165
166 clsi_bp=pd.read_csv(input_clsi,sep=',') 166 clsi_bp=pd.read_csv(input_clsi,sep=',')
167 167
168 #clsi_bp[clsi_bp[['Antibiotics', 'Susceptible']].duplicated()].shape 168 #clsi_bp[clsi_bp[['Antibiotics', 'Susceptible']].duplicated()].shape
169 169
170 170
171 # In[148]: 171 # In[62]:
172 172
173 173
174 #clsi_bp 174 #clsi_bp
175 #strain_mic 175 #strain_mic
176 176
177 177
178 # In[149]: 178 # In[64]:
179 179
180 180
181 # warn user for duplicate files
181 input_dups=strain_mic[strain_mic[['Strain name','Antibiotics']].duplicated()] 182 input_dups=strain_mic[strain_mic[['Strain name','Antibiotics']].duplicated()]
182 if (input_dups.shape[0] == 0): 183 if (input_dups.shape[0] == 0):
183 #print( "No duplicates") 184 #print( "No duplicates")
184 pass 185 pass
185 else: 186 else:
186 input_dups.to_csv(output_table,na_rep='NA') 187 with open(output_table, "w") as file_object:
187 with open(output_table, "a") as file_object:
188 # Append 'hello' at the end of file 188 # Append 'hello' at the end of file
189 file_object.write('Input File Error: Please remove duplicate/mutiple MIC values for same combination of Strain name and Antibiotics from input file') 189 file_object.write('S.No.,Strain name,Antibiotics,MIC\nInput File Error: Please remove duplicate/mutiple MIC values for same combination of Strain name and Antibiotics from input file\n')
190 input_dups.to_csv(output_table,na_rep='NA', mode='a')
190 exit() 191 exit()
191 #input_dups.head() 192
192 193
193 194 # In[17]:
194 # In[125]:
195 195
196 196
197 # convert MIC to numbers sMIC, rMIC 197 # convert MIC to numbers sMIC, rMIC
198 clsi_bp['s_mic'] =clsi_bp[['Susceptible']].applymap(lambda x: (re.sub(r'[^0-9.\/-]', '', x))) 198 clsi_bp['s_mic'] =clsi_bp[['Susceptible']].applymap(lambda x: (re.sub(r'[^0-9.\/-]', '', x)))
199 clsi_bp['r_mic'] =clsi_bp[['Resistant']].applymap(lambda x: (re.sub(r'[^0-9.\/-]', '', x))) 199 clsi_bp['r_mic'] =clsi_bp[['Resistant']].applymap(lambda x: (re.sub(r'[^0-9.\/-]', '', x)))
200 clsi_bp['i_mic'] = clsi_bp[['Intermediate']].applymap(lambda x: (re.sub(r'[^0-9.\/-]', '', x))) 200 clsi_bp['i_mic'] = clsi_bp[['Intermediate']].applymap(lambda x: (re.sub(r'[^0-9.\/-]', '', x)))
201 201
202 202
203 # In[126]: 203 # In[18]:
204 204
205 205
206 #clsi_bp['i_mic'] = clsi_bp[['Intermediate']].applymap(lambda x: (re.sub(r'[^0-9.\/-]', '', x))) 206 #clsi_bp['i_mic'] = clsi_bp[['Intermediate']].applymap(lambda x: (re.sub(r'[^0-9.\/-]', '', x)))
207 207
208 208
209 # In[127]: 209 # In[19]:
210 210
211 211
212 # Read only numbers in MIC values 212 # Read only numbers in MIC values
213 #try: 213 #try:
214 strain_mic['o_mic']=strain_mic[['MIC']].applymap(lambda x: (re.sub(r'[^0-9.\/]','', x))) 214 strain_mic['o_mic']=strain_mic[['MIC']].applymap(lambda x: (re.sub(r'[^0-9.\/]','', x)))
215 #except TypeError: 215 #except TypeError:
216 # print('Waring: Error in MIC value') 216 # print('Waring: Error in MIC value')
217 217
218 218
219 # In[128]: 219 # In[20]:
220 220
221 221
222 #strain_mic 222 #strain_mic
223 223
224 224
225 # In[129]: 225 # In[21]:
226 226
227 227
228 # capitalize each Antibiotic Name for comparision with removing whitespace 228 # capitalize each Antibiotic Name for comparision with removing whitespace
229 strain_mic['Strain name']=strain_mic['Strain name'].str.capitalize().str.replace(" ","") 229 strain_mic['Strain name']=strain_mic['Strain name'].str.capitalize().str.replace(" ","")
230 strain_mic['Antibiotics']=strain_mic['Antibiotics'].str.capitalize().str.replace(" ","") 230 strain_mic['Antibiotics']=strain_mic['Antibiotics'].str.capitalize().str.replace(" ","")
231 231
232 clsi_bp['Antibiotics']=clsi_bp['Antibiotics'].str.capitalize().str.replace(" ","") 232 clsi_bp['Antibiotics']=clsi_bp['Antibiotics'].str.capitalize().str.replace(" ","")
233 233
234 234
235 # In[130]: 235 # In[22]:
236 236
237 237
238 #find duplicate values in input files 238 #find duplicate values in input files
239 dups=strain_mic[strain_mic[['Strain name', 'Antibiotics']].duplicated(keep=False)] 239 dups=strain_mic[strain_mic[['Strain name', 'Antibiotics']].duplicated(keep=False)]
240 if dups.shape[0] != 0: 240 if dups.shape[0] != 0:
247 result=pd.merge(strain_mic, clsi_bp, on='Antibiotics',how='inner')[['Strain name','Antibiotics', 'MIC', 'o_mic', 's_mic', 'r_mic','i_mic']] 247 result=pd.merge(strain_mic, clsi_bp, on='Antibiotics',how='inner')[['Strain name','Antibiotics', 'MIC', 'o_mic', 's_mic', 'r_mic','i_mic']]
248 except KeyError: 248 except KeyError:
249 print('Waring: Error in input Values') 249 print('Waring: Error in input Values')
250 250
251 251
252 # In[131]: 252 # In[23]:
253 253
254 254
255 #result 255 dups.head()
256 256
257 257
258 # In[132]: 258 # In[132]:
259 259
260 260