Mercurial > repos > rakesh4osdd > clsi_profile
comparison clsi_profile.py @ 5:3c27e5c2a8e9 draft
"planemo upload for repository https://github.com/rakesh4osdd/asist/tree/master commit d4b81c15919b4b87d215eaf1b792c8f251665647"
author | rakesh4osdd |
---|---|
date | Tue, 29 Jun 2021 12:58:49 +0000 |
parents | 2a5861818faf |
children |
comparison
equal
deleted
inserted
replaced
4:2a5861818faf | 5:3c27e5c2a8e9 |
---|---|
1 #!/usr/bin/env python | 1 #!/usr/bin/env python |
2 # coding: utf-8 | 2 # coding: utf-8 |
3 | 3 |
4 # In[115]: | 4 # In[7]: |
5 | 5 |
6 | 6 |
7 # ASIST module2 | map AST result to the CLSI breakporints with combination antibiotics | 7 # ASIST module2 | map AST result to the CLSI breakporints with combination antibiotics |
8 # By rakesh4osdd@gmail.com, 06-Jun-2021 | 8 # By rakesh4osdd@gmail.com, 06-Jun-2021 |
9 import pandas as pd | 9 import pandas as pd |
10 import re | 10 import re |
11 import sys | 11 import sys |
12 | 12 |
13 | 13 |
14 # In[116]: | 14 # In[8]: |
15 | 15 |
16 | 16 |
17 #print(pd.__version__, re.__version__) | 17 #print(pd.__version__, re.__version__) |
18 | 18 |
19 | 19 |
20 # In[117]: | 20 # In[9]: |
21 | 21 |
22 | 22 |
23 # compare two MIC value strings | 23 # compare two MIC value strings |
24 def check_mic(mic1,mic2,mic_type): | 24 def check_mic(mic1,mic2,mic_type): |
25 #print(mic1,mic2,mic_type) | 25 #print(mic1,mic2,mic_type) |
103 return(m_type) | 103 return(m_type) |
104 | 104 |
105 #check_mic('65','32-64','i') | 105 #check_mic('65','32-64','i') |
106 | 106 |
107 | 107 |
108 # In[118]: | 108 # In[10]: |
109 | 109 |
110 | 110 |
111 # compare MIC value in pandas list | 111 # compare MIC value in pandas list |
112 def sus_res_int(mic): | 112 def sus_res_int(mic): |
113 #print(mic) | 113 #print(mic) |
130 | 130 |
131 #mic=['128','16/4','128/4','32/4-64/4'] | 131 #mic=['128','16/4','128/4','32/4-64/4'] |
132 #sus_res_int(mic) | 132 #sus_res_int(mic) |
133 | 133 |
134 | 134 |
135 # In[119]: | 135 # In[11]: |
136 | 136 |
137 | 137 |
138 # for input argument | 138 # for input argument |
139 input_user = sys.argv[1] | 139 input_user = sys.argv[1] |
140 input_clsi = sys.argv[2] | 140 input_clsi = sys.argv[2] |
141 output_table = sys.argv[3] | 141 output_table = sys.argv[3] |
142 | 142 |
143 | 143 |
144 # In[3]: | 144 # In[49]: |
145 | 145 |
146 | 146 |
147 """#input_user='~/Jupyterlab_notebook/ASIST_module/strain_profiles_16k.csv.csv' | 147 """input_user='~/Jupyterlab_notebook/ASIST_module/strain_profiles_16k.csv.csv' |
148 input_user='test-data/input2.csv' | 148 #input_user='test-data/input2.csv' |
149 input_clsi='test-data/clsi.csv' | 149 input_clsi='test-data/clsi.csv' |
150 output_profile='test-data/input2_profile.csv' | 150 output_profile='test-data/input2_profile.csv' |
151 output_table='test-data/input2_table.csv' | 151 #output_table='test-data/input2_table.csv' |
152 #output_table='/home/rakesh/Jupyterlab_notebook/ASIST_module/strain_profiles_16k_table.csv'""" | 152 output_table='/home/rakesh/Jupyterlab_notebook/ASIST_module/strain_profiles_16k_table.csv'""" |
153 | 153 |
154 | 154 |
155 # In[146]: | 155 # In[60]: |
156 | 156 |
157 | 157 |
158 # read user AST data with selected 3 columns | 158 # read user AST data with selected 3 columns |
159 strain_mic=pd.read_csv(input_user, sep=',', usecols =['Strain name', 'Antibiotics', 'MIC'],na_filter=False) | 159 strain_mic=pd.read_csv(input_user, sep=',', usecols =['Strain name', 'Antibiotics', 'MIC'],na_filter=False) |
160 #strain_mic | 160 #strain_mic |
161 | 161 |
162 | 162 |
163 # In[147]: | 163 # In[61]: |
164 | 164 |
165 | 165 |
166 clsi_bp=pd.read_csv(input_clsi,sep=',') | 166 clsi_bp=pd.read_csv(input_clsi,sep=',') |
167 | 167 |
168 #clsi_bp[clsi_bp[['Antibiotics', 'Susceptible']].duplicated()].shape | 168 #clsi_bp[clsi_bp[['Antibiotics', 'Susceptible']].duplicated()].shape |
169 | 169 |
170 | 170 |
171 # In[148]: | 171 # In[62]: |
172 | 172 |
173 | 173 |
174 #clsi_bp | 174 #clsi_bp |
175 #strain_mic | 175 #strain_mic |
176 | 176 |
177 | 177 |
178 # In[149]: | 178 # In[64]: |
179 | 179 |
180 | 180 |
181 # warn user for duplicate files | |
181 input_dups=strain_mic[strain_mic[['Strain name','Antibiotics']].duplicated()] | 182 input_dups=strain_mic[strain_mic[['Strain name','Antibiotics']].duplicated()] |
182 if (input_dups.shape[0] == 0): | 183 if (input_dups.shape[0] == 0): |
183 #print( "No duplicates") | 184 #print( "No duplicates") |
184 pass | 185 pass |
185 else: | 186 else: |
186 input_dups.to_csv(output_table,na_rep='NA') | 187 with open(output_table, "w") as file_object: |
187 with open(output_table, "a") as file_object: | |
188 # Append 'hello' at the end of file | 188 # Append 'hello' at the end of file |
189 file_object.write('Input File Error: Please remove duplicate/mutiple MIC values for same combination of Strain name and Antibiotics from input file') | 189 file_object.write('S.No.,Strain name,Antibiotics,MIC\nInput File Error: Please remove duplicate/mutiple MIC values for same combination of Strain name and Antibiotics from input file\n') |
190 input_dups.to_csv(output_table,na_rep='NA', mode='a') | |
190 exit() | 191 exit() |
191 #input_dups.head() | 192 |
192 | 193 |
193 | 194 # In[17]: |
194 # In[125]: | |
195 | 195 |
196 | 196 |
197 # convert MIC to numbers sMIC, rMIC | 197 # convert MIC to numbers sMIC, rMIC |
198 clsi_bp['s_mic'] =clsi_bp[['Susceptible']].applymap(lambda x: (re.sub(r'[^0-9.\/-]', '', x))) | 198 clsi_bp['s_mic'] =clsi_bp[['Susceptible']].applymap(lambda x: (re.sub(r'[^0-9.\/-]', '', x))) |
199 clsi_bp['r_mic'] =clsi_bp[['Resistant']].applymap(lambda x: (re.sub(r'[^0-9.\/-]', '', x))) | 199 clsi_bp['r_mic'] =clsi_bp[['Resistant']].applymap(lambda x: (re.sub(r'[^0-9.\/-]', '', x))) |
200 clsi_bp['i_mic'] = clsi_bp[['Intermediate']].applymap(lambda x: (re.sub(r'[^0-9.\/-]', '', x))) | 200 clsi_bp['i_mic'] = clsi_bp[['Intermediate']].applymap(lambda x: (re.sub(r'[^0-9.\/-]', '', x))) |
201 | 201 |
202 | 202 |
203 # In[126]: | 203 # In[18]: |
204 | 204 |
205 | 205 |
206 #clsi_bp['i_mic'] = clsi_bp[['Intermediate']].applymap(lambda x: (re.sub(r'[^0-9.\/-]', '', x))) | 206 #clsi_bp['i_mic'] = clsi_bp[['Intermediate']].applymap(lambda x: (re.sub(r'[^0-9.\/-]', '', x))) |
207 | 207 |
208 | 208 |
209 # In[127]: | 209 # In[19]: |
210 | 210 |
211 | 211 |
212 # Read only numbers in MIC values | 212 # Read only numbers in MIC values |
213 #try: | 213 #try: |
214 strain_mic['o_mic']=strain_mic[['MIC']].applymap(lambda x: (re.sub(r'[^0-9.\/]','', x))) | 214 strain_mic['o_mic']=strain_mic[['MIC']].applymap(lambda x: (re.sub(r'[^0-9.\/]','', x))) |
215 #except TypeError: | 215 #except TypeError: |
216 # print('Waring: Error in MIC value') | 216 # print('Waring: Error in MIC value') |
217 | 217 |
218 | 218 |
219 # In[128]: | 219 # In[20]: |
220 | 220 |
221 | 221 |
222 #strain_mic | 222 #strain_mic |
223 | 223 |
224 | 224 |
225 # In[129]: | 225 # In[21]: |
226 | 226 |
227 | 227 |
228 # capitalize each Antibiotic Name for comparision with removing whitespace | 228 # capitalize each Antibiotic Name for comparision with removing whitespace |
229 strain_mic['Strain name']=strain_mic['Strain name'].str.capitalize().str.replace(" ","") | 229 strain_mic['Strain name']=strain_mic['Strain name'].str.capitalize().str.replace(" ","") |
230 strain_mic['Antibiotics']=strain_mic['Antibiotics'].str.capitalize().str.replace(" ","") | 230 strain_mic['Antibiotics']=strain_mic['Antibiotics'].str.capitalize().str.replace(" ","") |
231 | 231 |
232 clsi_bp['Antibiotics']=clsi_bp['Antibiotics'].str.capitalize().str.replace(" ","") | 232 clsi_bp['Antibiotics']=clsi_bp['Antibiotics'].str.capitalize().str.replace(" ","") |
233 | 233 |
234 | 234 |
235 # In[130]: | 235 # In[22]: |
236 | 236 |
237 | 237 |
238 #find duplicate values in input files | 238 #find duplicate values in input files |
239 dups=strain_mic[strain_mic[['Strain name', 'Antibiotics']].duplicated(keep=False)] | 239 dups=strain_mic[strain_mic[['Strain name', 'Antibiotics']].duplicated(keep=False)] |
240 if dups.shape[0] != 0: | 240 if dups.shape[0] != 0: |
247 result=pd.merge(strain_mic, clsi_bp, on='Antibiotics',how='inner')[['Strain name','Antibiotics', 'MIC', 'o_mic', 's_mic', 'r_mic','i_mic']] | 247 result=pd.merge(strain_mic, clsi_bp, on='Antibiotics',how='inner')[['Strain name','Antibiotics', 'MIC', 'o_mic', 's_mic', 'r_mic','i_mic']] |
248 except KeyError: | 248 except KeyError: |
249 print('Waring: Error in input Values') | 249 print('Waring: Error in input Values') |
250 | 250 |
251 | 251 |
252 # In[131]: | 252 # In[23]: |
253 | 253 |
254 | 254 |
255 #result | 255 dups.head() |
256 | 256 |
257 | 257 |
258 # In[132]: | 258 # In[132]: |
259 | 259 |
260 | 260 |