Mercurial > repos > ecology > timeseries_extraction
comparison xarray_netcdf2netcdf.py @ 0:810820a0d45c draft default tip
planemo upload for repository https://github.com/galaxyecology/tools-ecology/tree/master/tools/data_manipulation/xarray/ commit fd8ad4d97db7b1fd3876ff63e14280474e06fdf7
author | ecology |
---|---|
date | Sun, 31 Jul 2022 21:23:21 +0000 |
parents | |
children |
comparison
equal
deleted
inserted
replaced
-1:000000000000 | 0:810820a0d45c |
---|---|
1 #!/usr/bin/env python3 | |
2 # | |
3 # Apply operations on selected variables | |
4 # - scale | |
5 # one can also select the range of time (for timeseries) | |
6 # to apply these operations over the range only | |
7 # when a range of time is selected and when scaling, one | |
8 # can choose to save the entire timeseries or | |
9 # the selected range only. | |
10 # when scaling, one can add additional filters on dimensions | |
11 # (typically used to filter over latitudes and longitudes) | |
12 | |
13 | |
14 import argparse | |
15 import re | |
16 import warnings | |
17 from pathlib import Path | |
18 | |
19 import xarray as xr # noqa: E402 | |
20 | |
21 | |
22 class netCDF2netCDF (): | |
23 def __init__(self, infile, varname, scale="", | |
24 output="output.netcdf", | |
25 write_all=False, | |
26 keep_attributes=True, | |
27 filter_list="", | |
28 where_config="", | |
29 other="", | |
30 sel=False, | |
31 drop=False, | |
32 verbose=False): | |
33 self.drop = drop | |
34 if Path(where_config).exists(): | |
35 f = open(where_config) | |
36 self.where = f.read().replace("\n", "") | |
37 else: | |
38 self.where = "" | |
39 self.other = other | |
40 self.sel = sel | |
41 li = list(infile.split(",")) | |
42 if len(li) > 1: | |
43 self.infile = li | |
44 else: | |
45 self.infile = infile | |
46 self.verbose = verbose | |
47 if varname == 'None' or varname is None: | |
48 self.varname = varname | |
49 else: | |
50 li = list(varname.split(",")) | |
51 self.varname = li | |
52 self.write_all = write_all | |
53 self.keep_attributes = keep_attributes | |
54 if self.keep_attributes: | |
55 xr.set_options(keep_attrs=True) | |
56 self.filter = filter_list | |
57 self.selection = {} | |
58 self.method = {} | |
59 if scale == "" or scale is None: | |
60 self.scale = 1 | |
61 else: | |
62 self.scale = float(scale) | |
63 if output is None: | |
64 self.output = "output.netcdf" | |
65 else: | |
66 self.output = output | |
67 # initialization | |
68 self.dset = None | |
69 self.subset = None | |
70 if self.verbose: | |
71 print("infile: ", self.infile) | |
72 print("varname: ", self.varname) | |
73 print("filter_list: ", self.filter) | |
74 print("scale: ", self.scale) | |
75 print("write_all: ", self.write_all) | |
76 print("keep_attributes: ", self.keep_attributes) | |
77 print("sel: ", self.sel) | |
78 print("output: ", self.output) | |
79 | |
80 def apply_selection(self): | |
81 self.dset = self.ds | |
82 for key in self.selection: | |
83 if 'slice' in str(self.selection[key]): | |
84 self.dset = self.dset.sel( | |
85 {key: self.selection[key]} | |
86 ) | |
87 else: | |
88 self.dset = self.dset.sel( | |
89 {key: self.selection[key]}, | |
90 method=self.method[key] | |
91 ) | |
92 | |
93 def dimension_selection(self, single_filter): | |
94 split_filter = single_filter.split('#') | |
95 dimension_varname = split_filter[0] | |
96 op = split_filter[1] | |
97 if self.sel: | |
98 ll = float(split_filter[2]) | |
99 else: | |
100 ll = int(split_filter[2]) | |
101 if (op == 'sl'): | |
102 if self.sel: | |
103 rl = float(split_filter[3]) | |
104 else: | |
105 rl = int(split_filter[3]) | |
106 self.selection[dimension_varname] = slice(ll, rl) | |
107 elif (op == 'to'): | |
108 self.selection[dimension_varname] = slice(None, ll) | |
109 elif (op == 'from'): | |
110 self.selection[dimension_varname] = slice(ll, None) | |
111 elif (op == 'is'): | |
112 self.selection[dimension_varname] = ll | |
113 if self.sel: | |
114 rl = split_filter[3] | |
115 if 'None' in rl: | |
116 self.method[dimension_varname] = None | |
117 else: | |
118 self.method[dimension_varname] = rl | |
119 | |
120 def filter_selection(self): | |
121 for single_filter in self.filter: | |
122 self.dimension_selection(single_filter) | |
123 | |
124 if self.sel: | |
125 self.apply_selection() | |
126 else: | |
127 self.dset = \ | |
128 self.ds.isel(self.selection) | |
129 | |
130 if self.varname != 'None' and self.varname is not None: | |
131 for var in self.varname: | |
132 self.dset[var] = \ | |
133 self.dset[var]*self.scale | |
134 | |
135 def compute(self): | |
136 if self.dset is None: | |
137 if type(self.infile) is list: | |
138 self.ds = xr.open_mfdataset(self.infile) | |
139 else: | |
140 self.ds = xr.open_dataset(self.infile) | |
141 if self.where != "": | |
142 if self.drop: | |
143 if self.verbose: | |
144 print("Where with drop=True") | |
145 self.ds = self.ds.where( | |
146 self.eval_where(self.where), | |
147 drop=True | |
148 ) | |
149 elif self.other is not None and self.other != "": | |
150 if self.verbose: | |
151 print("Where with other=", float(self.other)) | |
152 self.ds = self.ds.where( | |
153 self.eval_where(self.where), | |
154 other=float(self.other) | |
155 ) | |
156 else: | |
157 self.ds = self.ds.where( | |
158 self.eval_where(self.where) | |
159 ) | |
160 self.filter_selection() | |
161 if self.verbose: | |
162 print(self.selection) | |
163 | |
164 def save(self): | |
165 if self.varname != 'None' and \ | |
166 self.varname is not None and \ | |
167 not self.write_all: | |
168 self.dset[self.varname].to_netcdf(self.output) | |
169 else: | |
170 self.dset.to_netcdf(self.output) | |
171 | |
172 def is_float(self, element) -> bool: | |
173 try: | |
174 float(element) | |
175 return True | |
176 except ValueError: | |
177 return False | |
178 | |
179 def eval_where(self, where_condition): | |
180 eval_cond = None | |
181 list_names = list(set( | |
182 list(self.ds.keys()) + | |
183 list(self.ds.coords.keys())) | |
184 ) | |
185 wcond = where_condition | |
186 check_cond = where_condition | |
187 for var in list_names: | |
188 wcond = wcond.replace(var, ' self.ds.' + var + ' ') | |
189 check_cond = check_cond.replace(var, '') | |
190 to_remove = "[><=&|()]" | |
191 check_cond = re.sub(to_remove, "", check_cond).replace("!", "") | |
192 check_cond = re.sub(' +', ' ', check_cond).strip() | |
193 list_flt = check_cond.split(" ") | |
194 no_convert = False | |
195 for num in list_flt: | |
196 if not self.is_float(num): | |
197 no_convert = True | |
198 if not no_convert: | |
199 eval_cond = eval(wcond) | |
200 return eval_cond | |
201 | |
202 | |
203 if __name__ == '__main__': | |
204 warnings.filterwarnings("ignore") | |
205 parser = argparse.ArgumentParser() | |
206 parser.add_argument( | |
207 'input', | |
208 help='input filename in netCDF format' | |
209 ) | |
210 parser.add_argument( | |
211 'varname', | |
212 help='Specify which variable to plot (case sensitive)' | |
213 ) | |
214 parser.add_argument( | |
215 '--filter', | |
216 nargs="*", | |
217 help='Filter list variable#operator#value_s#value_e' | |
218 ) | |
219 parser.add_argument( | |
220 '--where', | |
221 help='filename with where condition to be evaluated' | |
222 ) | |
223 parser.add_argument( | |
224 '--output', | |
225 help='Output filename to store the resulting netCDF file' | |
226 ) | |
227 parser.add_argument( | |
228 '--scale', | |
229 help='scale factor to apply to selection (float)' | |
230 ) | |
231 parser.add_argument( | |
232 '--other', | |
233 help='Value to use for locations where condition is False (float)' | |
234 ) | |
235 parser.add_argument( | |
236 "--write_all", | |
237 help="write all data to netCDF", | |
238 action="store_true") | |
239 parser.add_argument( | |
240 "--keep_attributes", | |
241 help="Keep all attributes", | |
242 action="store_true") | |
243 parser.add_argument( | |
244 "-v", "--verbose", | |
245 help="switch on verbose mode", | |
246 action="store_true") | |
247 parser.add_argument( | |
248 "--selection", | |
249 help="select by values", | |
250 action="store_true") | |
251 parser.add_argument( | |
252 "--drop", | |
253 help="drop values where condition is not met", | |
254 action="store_true") | |
255 args = parser.parse_args() | |
256 | |
257 print("args.selection", args.selection) | |
258 dset = netCDF2netCDF(infile=args.input, varname=args.varname, | |
259 scale=args.scale, output=args.output, | |
260 write_all=args.write_all, | |
261 sel=args.selection, | |
262 keep_attributes=args.keep_attributes, | |
263 filter_list=args.filter, | |
264 where_config=args.where, | |
265 drop=args.drop, other=args.other, | |
266 verbose=args.verbose) | |
267 dset.compute() | |
268 dset.save() |