comparison xarray_netcdf2netcdf.py @ 0:810820a0d45c draft default tip

planemo upload for repository https://github.com/galaxyecology/tools-ecology/tree/master/tools/data_manipulation/xarray/ commit fd8ad4d97db7b1fd3876ff63e14280474e06fdf7
author ecology
date Sun, 31 Jul 2022 21:23:21 +0000
parents
children
comparison
equal deleted inserted replaced
-1:000000000000 0:810820a0d45c
1 #!/usr/bin/env python3
2 #
3 # Apply operations on selected variables
4 # - scale
5 # one can also select the range of time (for timeseries)
6 # to apply these operations over the range only
7 # when a range of time is selected and when scaling, one
8 # can choose to save the entire timeseries or
9 # the selected range only.
10 # when scaling, one can add additional filters on dimensions
11 # (typically used to filter over latitudes and longitudes)
12
13
14 import argparse
15 import re
16 import warnings
17 from pathlib import Path
18
19 import xarray as xr # noqa: E402
20
21
22 class netCDF2netCDF ():
23 def __init__(self, infile, varname, scale="",
24 output="output.netcdf",
25 write_all=False,
26 keep_attributes=True,
27 filter_list="",
28 where_config="",
29 other="",
30 sel=False,
31 drop=False,
32 verbose=False):
33 self.drop = drop
34 if Path(where_config).exists():
35 f = open(where_config)
36 self.where = f.read().replace("\n", "")
37 else:
38 self.where = ""
39 self.other = other
40 self.sel = sel
41 li = list(infile.split(","))
42 if len(li) > 1:
43 self.infile = li
44 else:
45 self.infile = infile
46 self.verbose = verbose
47 if varname == 'None' or varname is None:
48 self.varname = varname
49 else:
50 li = list(varname.split(","))
51 self.varname = li
52 self.write_all = write_all
53 self.keep_attributes = keep_attributes
54 if self.keep_attributes:
55 xr.set_options(keep_attrs=True)
56 self.filter = filter_list
57 self.selection = {}
58 self.method = {}
59 if scale == "" or scale is None:
60 self.scale = 1
61 else:
62 self.scale = float(scale)
63 if output is None:
64 self.output = "output.netcdf"
65 else:
66 self.output = output
67 # initialization
68 self.dset = None
69 self.subset = None
70 if self.verbose:
71 print("infile: ", self.infile)
72 print("varname: ", self.varname)
73 print("filter_list: ", self.filter)
74 print("scale: ", self.scale)
75 print("write_all: ", self.write_all)
76 print("keep_attributes: ", self.keep_attributes)
77 print("sel: ", self.sel)
78 print("output: ", self.output)
79
80 def apply_selection(self):
81 self.dset = self.ds
82 for key in self.selection:
83 if 'slice' in str(self.selection[key]):
84 self.dset = self.dset.sel(
85 {key: self.selection[key]}
86 )
87 else:
88 self.dset = self.dset.sel(
89 {key: self.selection[key]},
90 method=self.method[key]
91 )
92
93 def dimension_selection(self, single_filter):
94 split_filter = single_filter.split('#')
95 dimension_varname = split_filter[0]
96 op = split_filter[1]
97 if self.sel:
98 ll = float(split_filter[2])
99 else:
100 ll = int(split_filter[2])
101 if (op == 'sl'):
102 if self.sel:
103 rl = float(split_filter[3])
104 else:
105 rl = int(split_filter[3])
106 self.selection[dimension_varname] = slice(ll, rl)
107 elif (op == 'to'):
108 self.selection[dimension_varname] = slice(None, ll)
109 elif (op == 'from'):
110 self.selection[dimension_varname] = slice(ll, None)
111 elif (op == 'is'):
112 self.selection[dimension_varname] = ll
113 if self.sel:
114 rl = split_filter[3]
115 if 'None' in rl:
116 self.method[dimension_varname] = None
117 else:
118 self.method[dimension_varname] = rl
119
120 def filter_selection(self):
121 for single_filter in self.filter:
122 self.dimension_selection(single_filter)
123
124 if self.sel:
125 self.apply_selection()
126 else:
127 self.dset = \
128 self.ds.isel(self.selection)
129
130 if self.varname != 'None' and self.varname is not None:
131 for var in self.varname:
132 self.dset[var] = \
133 self.dset[var]*self.scale
134
135 def compute(self):
136 if self.dset is None:
137 if type(self.infile) is list:
138 self.ds = xr.open_mfdataset(self.infile)
139 else:
140 self.ds = xr.open_dataset(self.infile)
141 if self.where != "":
142 if self.drop:
143 if self.verbose:
144 print("Where with drop=True")
145 self.ds = self.ds.where(
146 self.eval_where(self.where),
147 drop=True
148 )
149 elif self.other is not None and self.other != "":
150 if self.verbose:
151 print("Where with other=", float(self.other))
152 self.ds = self.ds.where(
153 self.eval_where(self.where),
154 other=float(self.other)
155 )
156 else:
157 self.ds = self.ds.where(
158 self.eval_where(self.where)
159 )
160 self.filter_selection()
161 if self.verbose:
162 print(self.selection)
163
164 def save(self):
165 if self.varname != 'None' and \
166 self.varname is not None and \
167 not self.write_all:
168 self.dset[self.varname].to_netcdf(self.output)
169 else:
170 self.dset.to_netcdf(self.output)
171
172 def is_float(self, element) -> bool:
173 try:
174 float(element)
175 return True
176 except ValueError:
177 return False
178
179 def eval_where(self, where_condition):
180 eval_cond = None
181 list_names = list(set(
182 list(self.ds.keys()) +
183 list(self.ds.coords.keys()))
184 )
185 wcond = where_condition
186 check_cond = where_condition
187 for var in list_names:
188 wcond = wcond.replace(var, ' self.ds.' + var + ' ')
189 check_cond = check_cond.replace(var, '')
190 to_remove = "[><=&|()]"
191 check_cond = re.sub(to_remove, "", check_cond).replace("!", "")
192 check_cond = re.sub(' +', ' ', check_cond).strip()
193 list_flt = check_cond.split(" ")
194 no_convert = False
195 for num in list_flt:
196 if not self.is_float(num):
197 no_convert = True
198 if not no_convert:
199 eval_cond = eval(wcond)
200 return eval_cond
201
202
203 if __name__ == '__main__':
204 warnings.filterwarnings("ignore")
205 parser = argparse.ArgumentParser()
206 parser.add_argument(
207 'input',
208 help='input filename in netCDF format'
209 )
210 parser.add_argument(
211 'varname',
212 help='Specify which variable to plot (case sensitive)'
213 )
214 parser.add_argument(
215 '--filter',
216 nargs="*",
217 help='Filter list variable#operator#value_s#value_e'
218 )
219 parser.add_argument(
220 '--where',
221 help='filename with where condition to be evaluated'
222 )
223 parser.add_argument(
224 '--output',
225 help='Output filename to store the resulting netCDF file'
226 )
227 parser.add_argument(
228 '--scale',
229 help='scale factor to apply to selection (float)'
230 )
231 parser.add_argument(
232 '--other',
233 help='Value to use for locations where condition is False (float)'
234 )
235 parser.add_argument(
236 "--write_all",
237 help="write all data to netCDF",
238 action="store_true")
239 parser.add_argument(
240 "--keep_attributes",
241 help="Keep all attributes",
242 action="store_true")
243 parser.add_argument(
244 "-v", "--verbose",
245 help="switch on verbose mode",
246 action="store_true")
247 parser.add_argument(
248 "--selection",
249 help="select by values",
250 action="store_true")
251 parser.add_argument(
252 "--drop",
253 help="drop values where condition is not met",
254 action="store_true")
255 args = parser.parse_args()
256
257 print("args.selection", args.selection)
258 dset = netCDF2netCDF(infile=args.input, varname=args.varname,
259 scale=args.scale, output=args.output,
260 write_all=args.write_all,
261 sel=args.selection,
262 keep_attributes=args.keep_attributes,
263 filter_list=args.filter,
264 where_config=args.where,
265 drop=args.drop, other=args.other,
266 verbose=args.verbose)
267 dset.compute()
268 dset.save()