Mercurial > repos > ecology > xarray_metadata_info
comparison xarray_info.py @ 5:00de53d18b99 draft default tip
planemo upload for repository https://github.com/galaxyecology/tools-ecology/tree/master/tools/data_manipulation/xarray/ commit fd8ad4d97db7b1fd3876ff63e14280474e06fdf7
author | ecology |
---|---|
date | Sun, 31 Jul 2022 21:22:03 +0000 |
parents | |
children |
comparison
equal
deleted
inserted
replaced
4:9bbaab36a5d4 | 5:00de53d18b99 |
---|---|
1 # xarray tool for: | |
2 # - getting metadata information | |
3 # - select data and save results in csv file for further post-processing | |
4 | |
5 import argparse | |
6 import csv | |
7 import os | |
8 import warnings | |
9 | |
10 import xarray as xr | |
11 | |
12 | |
13 class XarrayInfo (): | |
14 def __init__(self, infile, outfile_info="", outfile_summary="", | |
15 verbose=False, coords_info=None): | |
16 self.infile = infile | |
17 self.outfile_info = outfile_info | |
18 self.outfile_summary = outfile_summary | |
19 self.coords_info = coords_info | |
20 self.verbose = verbose | |
21 # initialization | |
22 self.dset = None | |
23 self.gset = None | |
24 if self.verbose: | |
25 print("infile: ", self.infile) | |
26 print("outfile_info: ", self.outfile_info) | |
27 print("outfile_summary: ", self.outfile_summary) | |
28 print("coords_info: ", self.coords_info) | |
29 | |
30 def info(self): | |
31 f = open(self.outfile_info, 'w') | |
32 ds = xr.open_dataset(self.infile) | |
33 ds.info(f) | |
34 f.close() | |
35 | |
36 def summary(self): | |
37 f = open(self.outfile_summary, 'w') | |
38 ds = xr.open_dataset(self.infile) | |
39 writer = csv.writer(f, delimiter='\t') | |
40 header = ['VariableName', 'NumberOfDimensions'] | |
41 for idx, val in enumerate(ds.dims.items()): | |
42 header.append('Dim' + str(idx) + 'Name') | |
43 header.append('Dim' + str(idx) + 'Size') | |
44 writer.writerow(header) | |
45 for name, da in ds.data_vars.items(): | |
46 line = [name] | |
47 line.append(len(ds[name].shape)) | |
48 for d, s in zip(da.shape, da.sizes): | |
49 line.append(s) | |
50 line.append(d) | |
51 writer.writerow(line) | |
52 for name, da in ds.coords.items(): | |
53 line = [name] | |
54 line.append(len(ds[name].shape)) | |
55 for d, s in zip(da.shape, da.sizes): | |
56 line.append(s) | |
57 line.append(d) | |
58 writer.writerow(line) | |
59 f.close() | |
60 | |
61 def get_coords_info(self): | |
62 ds = xr.open_dataset(self.infile) | |
63 for c in ds.coords: | |
64 filename = os.path.join(self.coords_info, | |
65 c.strip() + | |
66 '.tabular') | |
67 pd = ds.coords[c].to_pandas() | |
68 pd.index = range(len(pd)) | |
69 pd.to_csv(filename, header=False, sep='\t') | |
70 | |
71 | |
72 if __name__ == '__main__': | |
73 warnings.filterwarnings("ignore") | |
74 parser = argparse.ArgumentParser() | |
75 | |
76 parser.add_argument( | |
77 'infile', | |
78 help='netCDF input filename' | |
79 ) | |
80 parser.add_argument( | |
81 '--info', | |
82 help='Output filename where metadata information is stored' | |
83 ) | |
84 parser.add_argument( | |
85 '--summary', | |
86 help='Output filename where data summary information is stored' | |
87 ) | |
88 parser.add_argument( | |
89 '--coords_info', | |
90 help='output-folder where for each coordinate, coordinate values ' | |
91 ' are being printed in the corresponding outputfile' | |
92 ) | |
93 parser.add_argument( | |
94 "-v", "--verbose", | |
95 help="switch on verbose mode", | |
96 action="store_true" | |
97 ) | |
98 args = parser.parse_args() | |
99 | |
100 p = XarrayInfo(args.infile, args.info, args.summary, | |
101 args.verbose, args.coords_info) | |
102 if args.info: | |
103 p.info() | |
104 elif args.coords_info: | |
105 p.get_coords_info() | |
106 if args.summary: | |
107 p.summary() |