Mercurial > repos > ecology > xarray_select
comparison xarray_netcdf2netcdf.py @ 3:bf595d613af4 draft
"planemo upload for repository https://github.com/galaxyecology/tools-ecology/tree/master/tools/data_manipulation/xarray/ commit 2166974df82f97557b082a9e55135098e61640c4"
| author | ecology | 
|---|---|
| date | Thu, 20 Jan 2022 17:07:19 +0000 | 
| parents | 123a9a629bef | 
| children | 
   comparison
  equal
  deleted
  inserted
  replaced
| 2:123a9a629bef | 3:bf595d613af4 | 
|---|---|
| 10 # when scaling, one can add additional filters on dimensions | 10 # when scaling, one can add additional filters on dimensions | 
| 11 # (typically used to filter over latitudes and longitudes) | 11 # (typically used to filter over latitudes and longitudes) | 
| 12 | 12 | 
| 13 | 13 | 
| 14 import argparse | 14 import argparse | 
| 15 import re | |
| 15 import warnings | 16 import warnings | 
| 17 from pathlib import Path | |
| 16 | 18 | 
| 17 import xarray as xr # noqa: E402 | 19 import xarray as xr # noqa: E402 | 
| 18 | 20 | 
| 19 | 21 | 
| 20 class netCDF2netCDF (): | 22 class netCDF2netCDF (): | 
| 21 def __init__(self, infile, varname, scale="", | 23 def __init__(self, infile, varname, scale="", | 
| 22 output="output.netcdf", | 24 output="output.netcdf", | 
| 23 write_all=False, | 25 write_all=False, | 
| 26 keep_attributes=True, | |
| 24 filter_list="", | 27 filter_list="", | 
| 28 where_config="", | |
| 29 other="", | |
| 30 sel=False, | |
| 31 drop=False, | |
| 25 verbose=False): | 32 verbose=False): | 
| 26 self.infile = infile | 33 self.drop = drop | 
| 34 if Path(where_config).exists(): | |
| 35 f = open(where_config) | |
| 36 self.where = f.read().replace("\n", "") | |
| 37 else: | |
| 38 self.where = "" | |
| 39 self.other = other | |
| 40 self.sel = sel | |
| 41 li = list(infile.split(",")) | |
| 42 if len(li) > 1: | |
| 43 self.infile = li | |
| 44 else: | |
| 45 self.infile = infile | |
| 27 self.verbose = verbose | 46 self.verbose = verbose | 
| 28 self.varname = varname | 47 if varname == 'None' or varname is None: | 
| 48 self.varname = varname | |
| 49 else: | |
| 50 li = list(varname.split(",")) | |
| 51 self.varname = li | |
| 29 self.write_all = write_all | 52 self.write_all = write_all | 
| 53 self.keep_attributes = keep_attributes | |
| 54 if self.keep_attributes: | |
| 55 xr.set_options(keep_attrs=True) | |
| 30 self.filter = filter_list | 56 self.filter = filter_list | 
| 31 self.selection = {} | 57 self.selection = {} | 
| 58 self.method = {} | |
| 32 if scale == "" or scale is None: | 59 if scale == "" or scale is None: | 
| 33 self.scale = 1 | 60 self.scale = 1 | 
| 34 else: | 61 else: | 
| 35 self.scale = float(scale) | 62 self.scale = float(scale) | 
| 36 if output is None: | 63 if output is None: | 
| 44 print("infile: ", self.infile) | 71 print("infile: ", self.infile) | 
| 45 print("varname: ", self.varname) | 72 print("varname: ", self.varname) | 
| 46 print("filter_list: ", self.filter) | 73 print("filter_list: ", self.filter) | 
| 47 print("scale: ", self.scale) | 74 print("scale: ", self.scale) | 
| 48 print("write_all: ", self.write_all) | 75 print("write_all: ", self.write_all) | 
| 76 print("keep_attributes: ", self.keep_attributes) | |
| 77 print("sel: ", self.sel) | |
| 49 print("output: ", self.output) | 78 print("output: ", self.output) | 
| 79 | |
| 80 def apply_selection(self): | |
| 81 self.dset = self.ds | |
| 82 for key in self.selection: | |
| 83 if 'slice' in str(self.selection[key]): | |
| 84 self.dset = self.dset.sel( | |
| 85 {key: self.selection[key]} | |
| 86 ) | |
| 87 else: | |
| 88 self.dset = self.dset.sel( | |
| 89 {key: self.selection[key]}, | |
| 90 method=self.method[key] | |
| 91 ) | |
| 50 | 92 | 
| 51 def dimension_selection(self, single_filter): | 93 def dimension_selection(self, single_filter): | 
| 52 split_filter = single_filter.split('#') | 94 split_filter = single_filter.split('#') | 
| 53 dimension_varname = split_filter[0] | 95 dimension_varname = split_filter[0] | 
| 54 op = split_filter[1] | 96 op = split_filter[1] | 
| 55 ll = int(split_filter[2]) | 97 if self.sel: | 
| 98 ll = float(split_filter[2]) | |
| 99 else: | |
| 100 ll = int(split_filter[2]) | |
| 56 if (op == 'sl'): | 101 if (op == 'sl'): | 
| 57 rl = int(split_filter[3]) | 102 if self.sel: | 
| 103 rl = float(split_filter[3]) | |
| 104 else: | |
| 105 rl = int(split_filter[3]) | |
| 58 self.selection[dimension_varname] = slice(ll, rl) | 106 self.selection[dimension_varname] = slice(ll, rl) | 
| 59 elif (op == 'to'): | 107 elif (op == 'to'): | 
| 60 self.selection[dimension_varname] = slice(None, ll) | 108 self.selection[dimension_varname] = slice(None, ll) | 
| 61 elif (op == 'from'): | 109 elif (op == 'from'): | 
| 62 self.selection[dimension_varname] = slice(ll, None) | 110 self.selection[dimension_varname] = slice(ll, None) | 
| 63 elif (op == 'is'): | 111 elif (op == 'is'): | 
| 64 self.selection[dimension_varname] = ll | 112 self.selection[dimension_varname] = ll | 
| 113 if self.sel: | |
| 114 rl = split_filter[3] | |
| 115 if 'None' in rl: | |
| 116 self.method[dimension_varname] = None | |
| 117 else: | |
| 118 self.method[dimension_varname] = rl | |
| 65 | 119 | 
| 66 def filter_selection(self): | 120 def filter_selection(self): | 
| 67 for single_filter in self.filter: | 121 for single_filter in self.filter: | 
| 68 self.dimension_selection(single_filter) | 122 self.dimension_selection(single_filter) | 
| 69 if self.write_all: | 123 | 
| 70 self.ds[self.varname] = \ | 124 if self.sel: | 
| 71 self.ds[self.varname].isel(self.selection)*self.scale | 125 self.apply_selection() | 
| 72 else: | 126 else: | 
| 73 self.dset = \ | 127 self.dset = \ | 
| 74 self.ds[self.varname].isel(self.selection)*self.scale | 128 self.ds.isel(self.selection) | 
| 129 | |
| 130 if self.varname != 'None' and self.varname is not None: | |
| 131 for var in self.varname: | |
| 132 self.dset[var] = \ | |
| 133 self.dset[var]*self.scale | |
| 75 | 134 | 
| 76 def compute(self): | 135 def compute(self): | 
| 77 if self.dset is None: | 136 if self.dset is None: | 
| 78 self.ds = xr.open_dataset(self.infile) | 137 if type(self.infile) is list: | 
| 79 if self.filter: | 138 self.ds = xr.open_mfdataset(self.infile) | 
| 80 self.filter_selection() | 139 else: | 
| 81 if self.verbose: | 140 self.ds = xr.open_dataset(self.infile) | 
| 82 print(self.selection) | 141 if self.where != "": | 
| 83 elif self.write_all is not None: | 142 if self.drop: | 
| 84 self.dset = self.ds[self.varname] | 143 if self.verbose: | 
| 144 print("Where with drop=True") | |
| 145 self.ds = self.ds.where( | |
| 146 self.eval_where(self.where), | |
| 147 drop=True | |
| 148 ) | |
| 149 elif self.other is not None and self.other != "": | |
| 150 if self.verbose: | |
| 151 print("Where with other=", float(self.other)) | |
| 152 self.ds = self.ds.where( | |
| 153 self.eval_where(self.where), | |
| 154 other=float(self.other) | |
| 155 ) | |
| 156 else: | |
| 157 self.ds = self.ds.where( | |
| 158 self.eval_where(self.where) | |
| 159 ) | |
| 160 self.filter_selection() | |
| 161 if self.verbose: | |
| 162 print(self.selection) | |
| 85 | 163 | 
| 86 def save(self): | 164 def save(self): | 
| 87 if self.write_all: | 165 if self.varname != 'None' and \ | 
| 88 self.ds.to_netcdf(self.output) | 166 self.varname is not None and \ | 
| 167 not self.write_all: | |
| 168 self.dset[self.varname].to_netcdf(self.output) | |
| 89 else: | 169 else: | 
| 90 self.dset.to_netcdf(self.output) | 170 self.dset.to_netcdf(self.output) | 
| 171 | |
| 172 def is_float(self, element) -> bool: | |
| 173 try: | |
| 174 float(element) | |
| 175 return True | |
| 176 except ValueError: | |
| 177 return False | |
| 178 | |
| 179 def eval_where(self, where_condition): | |
| 180 eval_cond = None | |
| 181 list_names = list(set( | |
| 182 list(self.ds.keys()) + | |
| 183 list(self.ds.coords.keys())) | |
| 184 ) | |
| 185 wcond = where_condition | |
| 186 check_cond = where_condition | |
| 187 for var in list_names: | |
| 188 wcond = wcond.replace(var, ' self.ds.' + var + ' ') | |
| 189 check_cond = check_cond.replace(var, '') | |
| 190 to_remove = "[><=&|()]" | |
| 191 check_cond = re.sub(to_remove, "", check_cond).replace("!", "") | |
| 192 check_cond = re.sub(' +', ' ', check_cond).strip() | |
| 193 list_flt = check_cond.split(" ") | |
| 194 no_convert = False | |
| 195 for num in list_flt: | |
| 196 if not self.is_float(num): | |
| 197 no_convert = True | |
| 198 if not no_convert: | |
| 199 eval_cond = eval(wcond) | |
| 200 return eval_cond | |
| 91 | 201 | 
| 92 | 202 | 
| 93 if __name__ == '__main__': | 203 if __name__ == '__main__': | 
| 94 warnings.filterwarnings("ignore") | 204 warnings.filterwarnings("ignore") | 
| 95 parser = argparse.ArgumentParser() | 205 parser = argparse.ArgumentParser() | 
| 105 '--filter', | 215 '--filter', | 
| 106 nargs="*", | 216 nargs="*", | 
| 107 help='Filter list variable#operator#value_s#value_e' | 217 help='Filter list variable#operator#value_s#value_e' | 
| 108 ) | 218 ) | 
| 109 parser.add_argument( | 219 parser.add_argument( | 
| 220 '--where', | |
| 221 help='filename with where condition to be evaluated' | |
| 222 ) | |
| 223 parser.add_argument( | |
| 110 '--output', | 224 '--output', | 
| 111 help='Output filename to store the resulting netCDF file' | 225 help='Output filename to store the resulting netCDF file' | 
| 112 ) | 226 ) | 
| 113 parser.add_argument( | 227 parser.add_argument( | 
| 114 '--scale', | 228 '--scale', | 
| 115 help='scale factor to apply to selection (float)' | 229 help='scale factor to apply to selection (float)' | 
| 116 ) | 230 ) | 
| 117 parser.add_argument( | 231 parser.add_argument( | 
| 232 '--other', | |
| 233 help='Value to use for locations where condition is False (float)' | |
| 234 ) | |
| 235 parser.add_argument( | |
| 118 "--write_all", | 236 "--write_all", | 
| 119 help="write all data to netCDF", | 237 help="write all data to netCDF", | 
| 120 action="store_true") | 238 action="store_true") | 
| 121 parser.add_argument( | 239 parser.add_argument( | 
| 240 "--keep_attributes", | |
| 241 help="Keep all attributes", | |
| 242 action="store_true") | |
| 243 parser.add_argument( | |
| 122 "-v", "--verbose", | 244 "-v", "--verbose", | 
| 123 help="switch on verbose mode", | 245 help="switch on verbose mode", | 
| 124 action="store_true") | 246 action="store_true") | 
| 247 parser.add_argument( | |
| 248 "--selection", | |
| 249 help="select by values", | |
| 250 action="store_true") | |
| 251 parser.add_argument( | |
| 252 "--drop", | |
| 253 help="drop values where condition is not met", | |
| 254 action="store_true") | |
| 125 args = parser.parse_args() | 255 args = parser.parse_args() | 
| 126 | 256 | 
| 257 print("args.selection", args.selection) | |
| 127 dset = netCDF2netCDF(infile=args.input, varname=args.varname, | 258 dset = netCDF2netCDF(infile=args.input, varname=args.varname, | 
| 128 scale=args.scale, output=args.output, | 259 scale=args.scale, output=args.output, | 
| 260 write_all=args.write_all, | |
| 261 sel=args.selection, | |
| 262 keep_attributes=args.keep_attributes, | |
| 129 filter_list=args.filter, | 263 filter_list=args.filter, | 
| 130 write_all=args.write_all, | 264 where_config=args.where, | 
| 265 drop=args.drop, other=args.other, | |
| 131 verbose=args.verbose) | 266 verbose=args.verbose) | 
| 132 dset.compute() | 267 dset.compute() | 
| 133 dset.save() | 268 dset.save() | 
