Mercurial > repos > ecology > xarray_select
comparison xarray_netcdf2netcdf.py @ 3:bf595d613af4 draft
"planemo upload for repository https://github.com/galaxyecology/tools-ecology/tree/master/tools/data_manipulation/xarray/ commit 2166974df82f97557b082a9e55135098e61640c4"
author | ecology |
---|---|
date | Thu, 20 Jan 2022 17:07:19 +0000 |
parents | 123a9a629bef |
children |
comparison
equal
deleted
inserted
replaced
2:123a9a629bef | 3:bf595d613af4 |
---|---|
10 # when scaling, one can add additional filters on dimensions | 10 # when scaling, one can add additional filters on dimensions |
11 # (typically used to filter over latitudes and longitudes) | 11 # (typically used to filter over latitudes and longitudes) |
12 | 12 |
13 | 13 |
14 import argparse | 14 import argparse |
15 import re | |
15 import warnings | 16 import warnings |
17 from pathlib import Path | |
16 | 18 |
17 import xarray as xr # noqa: E402 | 19 import xarray as xr # noqa: E402 |
18 | 20 |
19 | 21 |
20 class netCDF2netCDF (): | 22 class netCDF2netCDF (): |
21 def __init__(self, infile, varname, scale="", | 23 def __init__(self, infile, varname, scale="", |
22 output="output.netcdf", | 24 output="output.netcdf", |
23 write_all=False, | 25 write_all=False, |
26 keep_attributes=True, | |
24 filter_list="", | 27 filter_list="", |
28 where_config="", | |
29 other="", | |
30 sel=False, | |
31 drop=False, | |
25 verbose=False): | 32 verbose=False): |
26 self.infile = infile | 33 self.drop = drop |
34 if Path(where_config).exists(): | |
35 f = open(where_config) | |
36 self.where = f.read().replace("\n", "") | |
37 else: | |
38 self.where = "" | |
39 self.other = other | |
40 self.sel = sel | |
41 li = list(infile.split(",")) | |
42 if len(li) > 1: | |
43 self.infile = li | |
44 else: | |
45 self.infile = infile | |
27 self.verbose = verbose | 46 self.verbose = verbose |
28 self.varname = varname | 47 if varname == 'None' or varname is None: |
48 self.varname = varname | |
49 else: | |
50 li = list(varname.split(",")) | |
51 self.varname = li | |
29 self.write_all = write_all | 52 self.write_all = write_all |
53 self.keep_attributes = keep_attributes | |
54 if self.keep_attributes: | |
55 xr.set_options(keep_attrs=True) | |
30 self.filter = filter_list | 56 self.filter = filter_list |
31 self.selection = {} | 57 self.selection = {} |
58 self.method = {} | |
32 if scale == "" or scale is None: | 59 if scale == "" or scale is None: |
33 self.scale = 1 | 60 self.scale = 1 |
34 else: | 61 else: |
35 self.scale = float(scale) | 62 self.scale = float(scale) |
36 if output is None: | 63 if output is None: |
44 print("infile: ", self.infile) | 71 print("infile: ", self.infile) |
45 print("varname: ", self.varname) | 72 print("varname: ", self.varname) |
46 print("filter_list: ", self.filter) | 73 print("filter_list: ", self.filter) |
47 print("scale: ", self.scale) | 74 print("scale: ", self.scale) |
48 print("write_all: ", self.write_all) | 75 print("write_all: ", self.write_all) |
76 print("keep_attributes: ", self.keep_attributes) | |
77 print("sel: ", self.sel) | |
49 print("output: ", self.output) | 78 print("output: ", self.output) |
79 | |
80 def apply_selection(self): | |
81 self.dset = self.ds | |
82 for key in self.selection: | |
83 if 'slice' in str(self.selection[key]): | |
84 self.dset = self.dset.sel( | |
85 {key: self.selection[key]} | |
86 ) | |
87 else: | |
88 self.dset = self.dset.sel( | |
89 {key: self.selection[key]}, | |
90 method=self.method[key] | |
91 ) | |
50 | 92 |
51 def dimension_selection(self, single_filter): | 93 def dimension_selection(self, single_filter): |
52 split_filter = single_filter.split('#') | 94 split_filter = single_filter.split('#') |
53 dimension_varname = split_filter[0] | 95 dimension_varname = split_filter[0] |
54 op = split_filter[1] | 96 op = split_filter[1] |
55 ll = int(split_filter[2]) | 97 if self.sel: |
98 ll = float(split_filter[2]) | |
99 else: | |
100 ll = int(split_filter[2]) | |
56 if (op == 'sl'): | 101 if (op == 'sl'): |
57 rl = int(split_filter[3]) | 102 if self.sel: |
103 rl = float(split_filter[3]) | |
104 else: | |
105 rl = int(split_filter[3]) | |
58 self.selection[dimension_varname] = slice(ll, rl) | 106 self.selection[dimension_varname] = slice(ll, rl) |
59 elif (op == 'to'): | 107 elif (op == 'to'): |
60 self.selection[dimension_varname] = slice(None, ll) | 108 self.selection[dimension_varname] = slice(None, ll) |
61 elif (op == 'from'): | 109 elif (op == 'from'): |
62 self.selection[dimension_varname] = slice(ll, None) | 110 self.selection[dimension_varname] = slice(ll, None) |
63 elif (op == 'is'): | 111 elif (op == 'is'): |
64 self.selection[dimension_varname] = ll | 112 self.selection[dimension_varname] = ll |
113 if self.sel: | |
114 rl = split_filter[3] | |
115 if 'None' in rl: | |
116 self.method[dimension_varname] = None | |
117 else: | |
118 self.method[dimension_varname] = rl | |
65 | 119 |
66 def filter_selection(self): | 120 def filter_selection(self): |
67 for single_filter in self.filter: | 121 for single_filter in self.filter: |
68 self.dimension_selection(single_filter) | 122 self.dimension_selection(single_filter) |
69 if self.write_all: | 123 |
70 self.ds[self.varname] = \ | 124 if self.sel: |
71 self.ds[self.varname].isel(self.selection)*self.scale | 125 self.apply_selection() |
72 else: | 126 else: |
73 self.dset = \ | 127 self.dset = \ |
74 self.ds[self.varname].isel(self.selection)*self.scale | 128 self.ds.isel(self.selection) |
129 | |
130 if self.varname != 'None' and self.varname is not None: | |
131 for var in self.varname: | |
132 self.dset[var] = \ | |
133 self.dset[var]*self.scale | |
75 | 134 |
76 def compute(self): | 135 def compute(self): |
77 if self.dset is None: | 136 if self.dset is None: |
78 self.ds = xr.open_dataset(self.infile) | 137 if type(self.infile) is list: |
79 if self.filter: | 138 self.ds = xr.open_mfdataset(self.infile) |
80 self.filter_selection() | 139 else: |
81 if self.verbose: | 140 self.ds = xr.open_dataset(self.infile) |
82 print(self.selection) | 141 if self.where != "": |
83 elif self.write_all is not None: | 142 if self.drop: |
84 self.dset = self.ds[self.varname] | 143 if self.verbose: |
144 print("Where with drop=True") | |
145 self.ds = self.ds.where( | |
146 self.eval_where(self.where), | |
147 drop=True | |
148 ) | |
149 elif self.other is not None and self.other != "": | |
150 if self.verbose: | |
151 print("Where with other=", float(self.other)) | |
152 self.ds = self.ds.where( | |
153 self.eval_where(self.where), | |
154 other=float(self.other) | |
155 ) | |
156 else: | |
157 self.ds = self.ds.where( | |
158 self.eval_where(self.where) | |
159 ) | |
160 self.filter_selection() | |
161 if self.verbose: | |
162 print(self.selection) | |
85 | 163 |
86 def save(self): | 164 def save(self): |
87 if self.write_all: | 165 if self.varname != 'None' and \ |
88 self.ds.to_netcdf(self.output) | 166 self.varname is not None and \ |
167 not self.write_all: | |
168 self.dset[self.varname].to_netcdf(self.output) | |
89 else: | 169 else: |
90 self.dset.to_netcdf(self.output) | 170 self.dset.to_netcdf(self.output) |
171 | |
172 def is_float(self, element) -> bool: | |
173 try: | |
174 float(element) | |
175 return True | |
176 except ValueError: | |
177 return False | |
178 | |
179 def eval_where(self, where_condition): | |
180 eval_cond = None | |
181 list_names = list(set( | |
182 list(self.ds.keys()) + | |
183 list(self.ds.coords.keys())) | |
184 ) | |
185 wcond = where_condition | |
186 check_cond = where_condition | |
187 for var in list_names: | |
188 wcond = wcond.replace(var, ' self.ds.' + var + ' ') | |
189 check_cond = check_cond.replace(var, '') | |
190 to_remove = "[><=&|()]" | |
191 check_cond = re.sub(to_remove, "", check_cond).replace("!", "") | |
192 check_cond = re.sub(' +', ' ', check_cond).strip() | |
193 list_flt = check_cond.split(" ") | |
194 no_convert = False | |
195 for num in list_flt: | |
196 if not self.is_float(num): | |
197 no_convert = True | |
198 if not no_convert: | |
199 eval_cond = eval(wcond) | |
200 return eval_cond | |
91 | 201 |
92 | 202 |
93 if __name__ == '__main__': | 203 if __name__ == '__main__': |
94 warnings.filterwarnings("ignore") | 204 warnings.filterwarnings("ignore") |
95 parser = argparse.ArgumentParser() | 205 parser = argparse.ArgumentParser() |
105 '--filter', | 215 '--filter', |
106 nargs="*", | 216 nargs="*", |
107 help='Filter list variable#operator#value_s#value_e' | 217 help='Filter list variable#operator#value_s#value_e' |
108 ) | 218 ) |
109 parser.add_argument( | 219 parser.add_argument( |
220 '--where', | |
221 help='filename with where condition to be evaluated' | |
222 ) | |
223 parser.add_argument( | |
110 '--output', | 224 '--output', |
111 help='Output filename to store the resulting netCDF file' | 225 help='Output filename to store the resulting netCDF file' |
112 ) | 226 ) |
113 parser.add_argument( | 227 parser.add_argument( |
114 '--scale', | 228 '--scale', |
115 help='scale factor to apply to selection (float)' | 229 help='scale factor to apply to selection (float)' |
116 ) | 230 ) |
117 parser.add_argument( | 231 parser.add_argument( |
232 '--other', | |
233 help='Value to use for locations where condition is False (float)' | |
234 ) | |
235 parser.add_argument( | |
118 "--write_all", | 236 "--write_all", |
119 help="write all data to netCDF", | 237 help="write all data to netCDF", |
120 action="store_true") | 238 action="store_true") |
121 parser.add_argument( | 239 parser.add_argument( |
240 "--keep_attributes", | |
241 help="Keep all attributes", | |
242 action="store_true") | |
243 parser.add_argument( | |
122 "-v", "--verbose", | 244 "-v", "--verbose", |
123 help="switch on verbose mode", | 245 help="switch on verbose mode", |
124 action="store_true") | 246 action="store_true") |
247 parser.add_argument( | |
248 "--selection", | |
249 help="select by values", | |
250 action="store_true") | |
251 parser.add_argument( | |
252 "--drop", | |
253 help="drop values where condition is not met", | |
254 action="store_true") | |
125 args = parser.parse_args() | 255 args = parser.parse_args() |
126 | 256 |
257 print("args.selection", args.selection) | |
127 dset = netCDF2netCDF(infile=args.input, varname=args.varname, | 258 dset = netCDF2netCDF(infile=args.input, varname=args.varname, |
128 scale=args.scale, output=args.output, | 259 scale=args.scale, output=args.output, |
260 write_all=args.write_all, | |
261 sel=args.selection, | |
262 keep_attributes=args.keep_attributes, | |
129 filter_list=args.filter, | 263 filter_list=args.filter, |
130 write_all=args.write_all, | 264 where_config=args.where, |
265 drop=args.drop, other=args.other, | |
131 verbose=args.verbose) | 266 verbose=args.verbose) |
132 dset.compute() | 267 dset.compute() |
133 dset.save() | 268 dset.save() |