comparison xarray_netcdf2netcdf.py @ 4:9bbaab36a5d4 draft

"planemo upload for repository https://github.com/galaxyecology/tools-ecology/tree/master/tools/data_manipulation/xarray/ commit 2166974df82f97557b082a9e55135098e61640c4"
author ecology
date Thu, 20 Jan 2022 17:09:40 +0000
parents 663268794710
children
comparison
equal deleted inserted replaced
3:663268794710 4:9bbaab36a5d4
10 # when scaling, one can add additional filters on dimensions 10 # when scaling, one can add additional filters on dimensions
11 # (typically used to filter over latitudes and longitudes) 11 # (typically used to filter over latitudes and longitudes)
12 12
13 13
14 import argparse 14 import argparse
15 import re
15 import warnings 16 import warnings
17 from pathlib import Path
16 18
17 import xarray as xr # noqa: E402 19 import xarray as xr # noqa: E402
18 20
19 21
20 class netCDF2netCDF (): 22 class netCDF2netCDF ():
21 def __init__(self, infile, varname, scale="", 23 def __init__(self, infile, varname, scale="",
22 output="output.netcdf", 24 output="output.netcdf",
23 write_all=False, 25 write_all=False,
26 keep_attributes=True,
24 filter_list="", 27 filter_list="",
28 where_config="",
29 other="",
30 sel=False,
31 drop=False,
25 verbose=False): 32 verbose=False):
26 self.infile = infile 33 self.drop = drop
34 if Path(where_config).exists():
35 f = open(where_config)
36 self.where = f.read().replace("\n", "")
37 else:
38 self.where = ""
39 self.other = other
40 self.sel = sel
41 li = list(infile.split(","))
42 if len(li) > 1:
43 self.infile = li
44 else:
45 self.infile = infile
27 self.verbose = verbose 46 self.verbose = verbose
28 self.varname = varname 47 if varname == 'None' or varname is None:
48 self.varname = varname
49 else:
50 li = list(varname.split(","))
51 self.varname = li
29 self.write_all = write_all 52 self.write_all = write_all
53 self.keep_attributes = keep_attributes
54 if self.keep_attributes:
55 xr.set_options(keep_attrs=True)
30 self.filter = filter_list 56 self.filter = filter_list
31 self.selection = {} 57 self.selection = {}
58 self.method = {}
32 if scale == "" or scale is None: 59 if scale == "" or scale is None:
33 self.scale = 1 60 self.scale = 1
34 else: 61 else:
35 self.scale = float(scale) 62 self.scale = float(scale)
36 if output is None: 63 if output is None:
44 print("infile: ", self.infile) 71 print("infile: ", self.infile)
45 print("varname: ", self.varname) 72 print("varname: ", self.varname)
46 print("filter_list: ", self.filter) 73 print("filter_list: ", self.filter)
47 print("scale: ", self.scale) 74 print("scale: ", self.scale)
48 print("write_all: ", self.write_all) 75 print("write_all: ", self.write_all)
76 print("keep_attributes: ", self.keep_attributes)
77 print("sel: ", self.sel)
49 print("output: ", self.output) 78 print("output: ", self.output)
79
80 def apply_selection(self):
81 self.dset = self.ds
82 for key in self.selection:
83 if 'slice' in str(self.selection[key]):
84 self.dset = self.dset.sel(
85 {key: self.selection[key]}
86 )
87 else:
88 self.dset = self.dset.sel(
89 {key: self.selection[key]},
90 method=self.method[key]
91 )
50 92
51 def dimension_selection(self, single_filter): 93 def dimension_selection(self, single_filter):
52 split_filter = single_filter.split('#') 94 split_filter = single_filter.split('#')
53 dimension_varname = split_filter[0] 95 dimension_varname = split_filter[0]
54 op = split_filter[1] 96 op = split_filter[1]
55 ll = int(split_filter[2]) 97 if self.sel:
98 ll = float(split_filter[2])
99 else:
100 ll = int(split_filter[2])
56 if (op == 'sl'): 101 if (op == 'sl'):
57 rl = int(split_filter[3]) 102 if self.sel:
103 rl = float(split_filter[3])
104 else:
105 rl = int(split_filter[3])
58 self.selection[dimension_varname] = slice(ll, rl) 106 self.selection[dimension_varname] = slice(ll, rl)
59 elif (op == 'to'): 107 elif (op == 'to'):
60 self.selection[dimension_varname] = slice(None, ll) 108 self.selection[dimension_varname] = slice(None, ll)
61 elif (op == 'from'): 109 elif (op == 'from'):
62 self.selection[dimension_varname] = slice(ll, None) 110 self.selection[dimension_varname] = slice(ll, None)
63 elif (op == 'is'): 111 elif (op == 'is'):
64 self.selection[dimension_varname] = ll 112 self.selection[dimension_varname] = ll
113 if self.sel:
114 rl = split_filter[3]
115 if 'None' in rl:
116 self.method[dimension_varname] = None
117 else:
118 self.method[dimension_varname] = rl
65 119
66 def filter_selection(self): 120 def filter_selection(self):
67 for single_filter in self.filter: 121 for single_filter in self.filter:
68 self.dimension_selection(single_filter) 122 self.dimension_selection(single_filter)
69 if self.write_all: 123
70 self.ds[self.varname] = \ 124 if self.sel:
71 self.ds[self.varname].isel(self.selection)*self.scale 125 self.apply_selection()
72 else: 126 else:
73 self.dset = \ 127 self.dset = \
74 self.ds[self.varname].isel(self.selection)*self.scale 128 self.ds.isel(self.selection)
129
130 if self.varname != 'None' and self.varname is not None:
131 for var in self.varname:
132 self.dset[var] = \
133 self.dset[var]*self.scale
75 134
76 def compute(self): 135 def compute(self):
77 if self.dset is None: 136 if self.dset is None:
78 self.ds = xr.open_dataset(self.infile) 137 if type(self.infile) is list:
79 if self.filter: 138 self.ds = xr.open_mfdataset(self.infile)
80 self.filter_selection() 139 else:
81 if self.verbose: 140 self.ds = xr.open_dataset(self.infile)
82 print(self.selection) 141 if self.where != "":
83 elif self.write_all is not None: 142 if self.drop:
84 self.dset = self.ds[self.varname] 143 if self.verbose:
144 print("Where with drop=True")
145 self.ds = self.ds.where(
146 self.eval_where(self.where),
147 drop=True
148 )
149 elif self.other is not None and self.other != "":
150 if self.verbose:
151 print("Where with other=", float(self.other))
152 self.ds = self.ds.where(
153 self.eval_where(self.where),
154 other=float(self.other)
155 )
156 else:
157 self.ds = self.ds.where(
158 self.eval_where(self.where)
159 )
160 self.filter_selection()
161 if self.verbose:
162 print(self.selection)
85 163
86 def save(self): 164 def save(self):
87 if self.write_all: 165 if self.varname != 'None' and \
88 self.ds.to_netcdf(self.output) 166 self.varname is not None and \
167 not self.write_all:
168 self.dset[self.varname].to_netcdf(self.output)
89 else: 169 else:
90 self.dset.to_netcdf(self.output) 170 self.dset.to_netcdf(self.output)
171
172 def is_float(self, element) -> bool:
173 try:
174 float(element)
175 return True
176 except ValueError:
177 return False
178
179 def eval_where(self, where_condition):
180 eval_cond = None
181 list_names = list(set(
182 list(self.ds.keys()) +
183 list(self.ds.coords.keys()))
184 )
185 wcond = where_condition
186 check_cond = where_condition
187 for var in list_names:
188 wcond = wcond.replace(var, ' self.ds.' + var + ' ')
189 check_cond = check_cond.replace(var, '')
190 to_remove = "[><=&|()]"
191 check_cond = re.sub(to_remove, "", check_cond).replace("!", "")
192 check_cond = re.sub(' +', ' ', check_cond).strip()
193 list_flt = check_cond.split(" ")
194 no_convert = False
195 for num in list_flt:
196 if not self.is_float(num):
197 no_convert = True
198 if not no_convert:
199 eval_cond = eval(wcond)
200 return eval_cond
91 201
92 202
93 if __name__ == '__main__': 203 if __name__ == '__main__':
94 warnings.filterwarnings("ignore") 204 warnings.filterwarnings("ignore")
95 parser = argparse.ArgumentParser() 205 parser = argparse.ArgumentParser()
105 '--filter', 215 '--filter',
106 nargs="*", 216 nargs="*",
107 help='Filter list variable#operator#value_s#value_e' 217 help='Filter list variable#operator#value_s#value_e'
108 ) 218 )
109 parser.add_argument( 219 parser.add_argument(
220 '--where',
221 help='filename with where condition to be evaluated'
222 )
223 parser.add_argument(
110 '--output', 224 '--output',
111 help='Output filename to store the resulting netCDF file' 225 help='Output filename to store the resulting netCDF file'
112 ) 226 )
113 parser.add_argument( 227 parser.add_argument(
114 '--scale', 228 '--scale',
115 help='scale factor to apply to selection (float)' 229 help='scale factor to apply to selection (float)'
116 ) 230 )
117 parser.add_argument( 231 parser.add_argument(
232 '--other',
233 help='Value to use for locations where condition is False (float)'
234 )
235 parser.add_argument(
118 "--write_all", 236 "--write_all",
119 help="write all data to netCDF", 237 help="write all data to netCDF",
120 action="store_true") 238 action="store_true")
121 parser.add_argument( 239 parser.add_argument(
240 "--keep_attributes",
241 help="Keep all attributes",
242 action="store_true")
243 parser.add_argument(
122 "-v", "--verbose", 244 "-v", "--verbose",
123 help="switch on verbose mode", 245 help="switch on verbose mode",
124 action="store_true") 246 action="store_true")
247 parser.add_argument(
248 "--selection",
249 help="select by values",
250 action="store_true")
251 parser.add_argument(
252 "--drop",
253 help="drop values where condition is not met",
254 action="store_true")
125 args = parser.parse_args() 255 args = parser.parse_args()
126 256
257 print("args.selection", args.selection)
127 dset = netCDF2netCDF(infile=args.input, varname=args.varname, 258 dset = netCDF2netCDF(infile=args.input, varname=args.varname,
128 scale=args.scale, output=args.output, 259 scale=args.scale, output=args.output,
260 write_all=args.write_all,
261 sel=args.selection,
262 keep_attributes=args.keep_attributes,
129 filter_list=args.filter, 263 filter_list=args.filter,
130 write_all=args.write_all, 264 where_config=args.where,
265 drop=args.drop, other=args.other,
131 verbose=args.verbose) 266 verbose=args.verbose)
132 dset.compute() 267 dset.compute()
133 dset.save() 268 dset.save()