# HG changeset patch # User ecology # Date 1590139155 14400 # Node ID 225d0d275a2452f289240df4575054d8d88b7d81 "planemo upload for repository https://github.com/galaxyecology/tools-ecology/tree/master/tools/data_manipulation/xarray/ commit f1455c158011dc4aab0fd469cf794be6f4142992" diff -r 000000000000 -r 225d0d275a24 README.md --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/README.md Fri May 22 05:19:15 2020 -0400 @@ -0,0 +1,8 @@ +# Xarray tools for netCDF +## netCDF metadata information + +The first tool `xarray_metadata_info ` uses xarray to provide users with general information about variable names, dimensions +and attributes. +Variables that can be extracted and dimensions available are printed in a tabular file. + +The tool also print a general information file. It's the result of the xarray method info(). diff -r 000000000000 -r 225d0d275a24 test-data/Metadata_infos_from_dataset-ibi-reanalysis-bio-005-003-monthly-regulargrid_1510914389133.nc.Variables.tab --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/Metadata_infos_from_dataset-ibi-reanalysis-bio-005-003-monthly-regulargrid_1510914389133.nc.Variables.tab Fri May 22 05:19:15 2020 -0400 @@ -0,0 +1,8 @@ +VariableName NumberOfDimensions Dim0Name Dim0Size Dim1Name Dim1Size Dim2Name Dim2Size Dim3Name Dim3Size +phy 4 time 145 depth 1 latitude 97 longitude 103 +chl 4 time 145 depth 1 latitude 97 longitude 103 +nh4 4 time 145 depth 1 latitude 97 longitude 103 +time 1 time 145 +longitude 1 longitude 103 +latitude 1 latitude 97 +depth 1 depth 1 diff -r 000000000000 -r 225d0d275a24 test-data/Test1.tabular --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/Test1.tabular Fri May 22 05:19:15 2020 -0400 @@ -0,0 +1,146 @@ + time depth longitude latitude phy +0 2002-12-15 0.5057600140571594 -2.0000007 44.0 1.0500183 +1 2003-01-15 0.5057600140571594 -2.0000007 44.0 1.25 +2 2003-02-15 0.5057600140571594 -2.0000007 44.0 1.3000183 +3 2003-03-15 0.5057600140571594 -2.0000007 44.0 6.0599976 +4 2003-04-15 0.5057600140571594 -2.0000007 44.0 2.25 +5 2003-05-15 0.5057600140571594 -2.0000007 44.0 0.6499939 +6 2003-06-15 0.5057600140571594 -2.0000007 44.0 0.42999268 +7 2003-07-15 0.5057600140571594 -2.0000007 44.0 0.42999268 +8 2003-08-15 0.5057600140571594 -2.0000007 44.0 0.480011 +9 2003-09-15 0.5057600140571594 -2.0000007 44.0 0.45999146 +10 2003-10-15 0.5057600140571594 -2.0000007 44.0 0.5 +11 2003-11-15 0.5057600140571594 -2.0000007 44.0 0.9299927 +12 2003-12-15 0.5057600140571594 -2.0000007 44.0 1.3900146 +13 2004-01-15 0.5057600140571594 -2.0000007 44.0 1.7400208 +14 2004-02-15 0.5057600140571594 -2.0000007 44.0 4.5 +15 2004-03-15 0.5057600140571594 -2.0000007 44.0 5.5500183 +16 2004-04-15 0.5057600140571594 -2.0000007 44.0 5.3099976 +17 2004-05-15 0.5057600140571594 -2.0000007 44.0 3.75 +18 2004-06-15 0.5057600140571594 -2.0000007 44.0 0.77001953 +19 2004-07-15 0.5057600140571594 -2.0000007 44.0 0.5 +20 2004-08-15 0.5057600140571594 -2.0000007 44.0 0.45999146 +21 2004-09-15 0.5057600140571594 -2.0000007 44.0 0.4500122 +22 2004-10-15 0.5057600140571594 -2.0000007 44.0 0.480011 +23 2004-11-15 0.5057600140571594 -2.0000007 44.0 0.83999634 +24 2004-12-15 0.5057600140571594 -2.0000007 44.0 1.7400208 +25 2005-01-15 0.5057600140571594 -2.0000007 44.0 1.7700195 +26 2005-02-15 0.5057600140571594 -2.0000007 44.0 1.5500183 +27 2005-03-15 0.5057600140571594 -2.0000007 44.0 7.149994 +28 2005-04-15 0.5057600140571594 -2.0000007 44.0 3.649994 +29 2005-05-15 0.5057600140571594 -2.0000007 44.0 2.5200195 +30 2005-06-15 0.5057600140571594 -2.0000007 44.0 0.45999146 +31 2005-07-15 0.5057600140571594 -2.0000007 44.0 0.6700134 +32 2005-08-15 0.5057600140571594 -2.0000007 44.0 0.4500122 +33 2005-09-15 0.5057600140571594 -2.0000007 44.0 0.45999146 +34 2005-10-15 0.5057600140571594 -2.0000007 44.0 0.45999146 +35 2005-11-15 0.5057600140571594 -2.0000007 44.0 0.6199951 +36 2005-12-15 0.5057600140571594 -2.0000007 44.0 1.1499939 +37 2006-01-15 0.5057600140571594 -2.0000007 44.0 3.5299988 +38 2006-02-15 0.5057600140571594 -2.0000007 44.0 7.1799927 +39 2006-03-15 0.5057600140571594 -2.0000007 44.0 6.5599976 +40 2006-04-15 0.5057600140571594 -2.0000007 44.0 3.8000183 +41 2006-05-15 0.5057600140571594 -2.0000007 44.0 0.95999146 +42 2006-06-15 0.5057600140571594 -2.0000007 44.0 1.5 +43 2006-07-15 0.5057600140571594 -2.0000007 44.0 1.0299988 +44 2006-08-15 0.5057600140571594 -2.0000007 44.0 0.480011 +45 2006-09-15 0.5057600140571594 -2.0000007 44.0 0.49002075 +46 2006-10-15 0.5057600140571594 -2.0000007 44.0 0.480011 +47 2006-11-15 0.5057600140571594 -2.0000007 44.0 0.9299927 +48 2006-12-15 0.5057600140571594 -2.0000007 44.0 1.2099915 +49 2007-01-15 0.5057600140571594 -2.0000007 44.0 1.1499939 +50 2007-02-15 0.5057600140571594 -2.0000007 44.0 1.7000122 +51 2007-03-15 0.5057600140571594 -2.0000007 44.0 5.230011 +52 2007-04-15 0.5057600140571594 -2.0000007 44.0 3.8600159 +53 2007-05-15 0.5057600140571594 -2.0000007 44.0 0.83999634 +54 2007-06-15 0.5057600140571594 -2.0000007 44.0 0.6799927 +55 2007-07-15 0.5057600140571594 -2.0000007 44.0 0.45999146 +56 2007-08-15 0.5057600140571594 -2.0000007 44.0 0.45999146 +57 2007-09-15 0.5057600140571594 -2.0000007 44.0 0.5 +58 2007-10-15 0.5057600140571594 -2.0000007 44.0 0.89001465 +59 2007-11-15 0.5057600140571594 -2.0000007 44.0 2.0299988 +60 2007-12-15 0.5057600140571594 -2.0000007 44.0 1.8399963 +61 2008-01-15 0.5057600140571594 -2.0000007 44.0 1.3399963 +62 2008-02-15 0.5057600140571594 -2.0000007 44.0 3.149994 +63 2008-03-15 0.5057600140571594 -2.0000007 44.0 4.5899963 +64 2008-04-15 0.5057600140571594 -2.0000007 44.0 5.080017 +65 2008-05-15 0.5057600140571594 -2.0000007 44.0 1.0 +66 2008-06-15 0.5057600140571594 -2.0000007 44.0 1.5299988 +67 2008-07-15 0.5057600140571594 -2.0000007 44.0 0.55999756 +68 2008-08-15 0.5057600140571594 -2.0000007 44.0 0.42999268 +69 2008-09-15 0.5057600140571594 -2.0000007 44.0 0.42999268 +70 2008-10-15 0.5057600140571594 -2.0000007 44.0 0.42999268 +71 2008-11-15 0.5057600140571594 -2.0000007 44.0 0.64001465 +72 2008-12-15 0.5057600140571594 -2.0000007 44.0 2.4200134 +73 2009-01-15 0.5057600140571594 -2.0000007 44.0 2.3900146 +74 2009-02-15 0.5057600140571594 -2.0000007 44.0 6.2099915 +75 2009-03-15 0.5057600140571594 -2.0000007 44.0 4.6799927 +76 2009-04-15 0.5057600140571594 -2.0000007 44.0 1.1100159 +77 2009-05-15 0.5057600140571594 -2.0000007 44.0 2.649994 +78 2009-06-15 0.5057600140571594 -2.0000007 44.0 1.4900208 +79 2009-07-15 0.5057600140571594 -2.0000007 44.0 0.5 +80 2009-08-15 0.5057600140571594 -2.0000007 44.0 0.45999146 +81 2009-09-15 0.5057600140571594 -2.0000007 44.0 0.5800171 +82 2009-10-15 0.5057600140571594 -2.0000007 44.0 0.6499939 +83 2009-11-15 0.5057600140571594 -2.0000007 44.0 0.8999939 +84 2009-12-15 0.5057600140571594 -2.0000007 44.0 1.3099976 +85 2010-01-15 0.5057600140571594 -2.0000007 44.0 1.5299988 +86 2010-02-15 0.5057600140571594 -2.0000007 44.0 2.9599915 +87 2010-03-15 0.5057600140571594 -2.0000007 44.0 5.450012 +88 2010-04-15 0.5057600140571594 -2.0000007 44.0 7.5899963 +89 2010-05-15 0.5057600140571594 -2.0000007 44.0 1.8000183 +90 2010-06-15 0.5057600140571594 -2.0000007 44.0 0.480011 +91 2010-07-15 0.5057600140571594 -2.0000007 44.0 0.5 +92 2010-08-15 0.5057600140571594 -2.0000007 44.0 0.45999146 +93 2010-09-15 0.5057600140571594 -2.0000007 44.0 0.49002075 +94 2010-10-15 0.5057600140571594 -2.0000007 44.0 0.45999146 +95 2010-11-15 0.5057600140571594 -2.0000007 44.0 0.9299927 +96 2010-12-15 0.5057600140571594 -2.0000007 44.0 1.1499939 +97 2011-01-15 0.5057600140571594 -2.0000007 44.0 2.4900208 +98 2011-02-15 0.5057600140571594 -2.0000007 44.0 5.1799927 +99 2011-03-15 0.5057600140571594 -2.0000007 44.0 7.029999 +100 2011-04-15 0.5057600140571594 -2.0000007 44.0 2.4900208 +101 2011-05-15 0.5057600140571594 -2.0000007 44.0 0.6499939 +102 2011-06-15 0.5057600140571594 -2.0000007 44.0 0.52001953 +103 2011-07-15 0.5057600140571594 -2.0000007 44.0 0.5 +104 2011-08-15 0.5057600140571594 -2.0000007 44.0 0.75 +105 2011-09-15 0.5057600140571594 -2.0000007 44.0 0.45999146 +106 2011-10-15 0.5057600140571594 -2.0000007 44.0 0.480011 +107 2011-11-15 0.5057600140571594 -2.0000007 44.0 0.730011 +108 2011-12-15 0.5057600140571594 -2.0000007 44.0 1.0299988 +109 2012-01-15 0.5057600140571594 -2.0000007 44.0 3.149994 +110 2012-02-15 0.5057600140571594 -2.0000007 44.0 2.3099976 +111 2012-03-15 0.5057600140571594 -2.0000007 44.0 5.5200195 +112 2012-04-15 0.5057600140571594 -2.0000007 44.0 3.399994 +113 2012-05-15 0.5057600140571594 -2.0000007 44.0 3.7000122 +114 2012-06-15 0.5057600140571594 -2.0000007 44.0 2.5899963 +115 2012-07-15 0.5057600140571594 -2.0000007 44.0 0.45999146 +116 2012-08-15 0.5057600140571594 -2.0000007 44.0 0.4500122 +117 2012-09-15 0.5057600140571594 -2.0000007 44.0 0.45999146 +118 2012-10-15 0.5057600140571594 -2.0000007 44.0 0.61001587 +119 2012-11-15 0.5057600140571594 -2.0000007 44.0 2.0299988 +120 2012-12-15 0.5057600140571594 -2.0000007 44.0 1.4200134 +121 2013-01-15 0.5057600140571594 -2.0000007 44.0 2.2700195 +122 2013-02-15 0.5057600140571594 -2.0000007 44.0 7.0 +123 2013-03-15 0.5057600140571594 -2.0000007 44.0 10.550018 +124 2013-04-15 0.5057600140571594 -2.0000007 44.0 5.8399963 +125 2013-05-15 0.5057600140571594 -2.0000007 44.0 1.2400208 +126 2013-06-15 0.5057600140571594 -2.0000007 44.0 4.1700134 +127 2013-07-15 0.5057600140571594 -2.0000007 44.0 3.2099915 +128 2013-08-15 0.5057600140571594 -2.0000007 44.0 0.45999146 +129 2013-09-15 0.5057600140571594 -2.0000007 44.0 0.480011 +130 2013-10-15 0.5057600140571594 -2.0000007 44.0 0.49002075 +131 2013-11-15 0.5057600140571594 -2.0000007 44.0 0.7799988 +132 2013-12-15 0.5057600140571594 -2.0000007 44.0 1.4500122 +133 2014-01-15 0.5057600140571594 -2.0000007 44.0 0.95999146 +134 2014-02-15 0.5057600140571594 -2.0000007 44.0 1.3900146 +135 2014-03-15 0.5057600140571594 -2.0000007 44.0 5.779999 +136 2014-04-15 0.5057600140571594 -2.0000007 44.0 5.4299927 +137 2014-05-15 0.5057600140571594 -2.0000007 44.0 1.1799927 +138 2014-06-15 0.5057600140571594 -2.0000007 44.0 0.730011 +139 2014-07-15 0.5057600140571594 -2.0000007 44.0 0.45999146 +140 2014-08-15 0.5057600140571594 -2.0000007 44.0 0.45999146 +141 2014-09-15 0.5057600140571594 -2.0000007 44.0 0.5 +142 2014-10-15 0.5057600140571594 -2.0000007 44.0 0.6199951 +143 2014-11-15 0.5057600140571594 -2.0000007 44.0 0.480011 +144 2014-12-15 0.5057600140571594 -2.0000007 44.0 0.55999756 diff -r 000000000000 -r 225d0d275a24 test-data/Test2.tabular --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/Test2.tabular Fri May 22 05:19:15 2020 -0400 @@ -0,0 +1,25 @@ + time depth latitude longitude nh4 +0 2003-12-15 0.5057600140571594 45.166664123535156 -0.6666674017906189 81.27 +1 2003-12-15 0.5057600140571594 45.416664123535156 -0.8333340883255005 78.08 +2 2003-12-15 0.5057600140571594 45.5 -0.9166674017906189 55.149998 +3 2004-01-15 0.5057600140571594 45.166664123535156 -0.6666674017906189 65.2 +4 2004-01-15 0.5057600140571594 45.416664123535156 -0.8333340883255005 64.11 +5 2004-02-15 0.5057600140571594 45.166664123535156 -0.6666674017906189 51.0 +6 2004-02-15 0.5057600140571594 45.416664123535156 -0.8333340883255005 51.32 +7 2004-05-15 0.5057600140571594 45.166664123535156 -0.6666674017906189 54.53 +8 2004-06-15 0.5057600140571594 45.166664123535156 -0.6666674017906189 79.79 +9 2004-06-15 0.5057600140571594 45.416664123535156 -0.8333340883255005 61.52 +10 2004-07-15 0.5057600140571594 45.166664123535156 -0.6666674017906189 99.159996 +11 2004-07-15 0.5057600140571594 45.416664123535156 -0.8333340883255005 77.93 +12 2004-08-15 0.5057600140571594 45.166664123535156 -0.6666674017906189 110.149994 +13 2004-08-15 0.5057600140571594 45.416664123535156 -0.8333340883255005 86.759995 +14 2004-09-15 0.5057600140571594 45.166664123535156 -0.6666674017906189 112.369995 +15 2004-09-15 0.5057600140571594 45.416664123535156 -0.8333340883255005 91.979996 +16 2004-10-15 0.5057600140571594 45.166664123535156 -0.6666674017906189 109.63 +17 2004-10-15 0.5057600140571594 45.416664123535156 -0.8333340883255005 95.509995 +18 2004-11-15 0.5057600140571594 45.166664123535156 -0.6666674017906189 98.45 +19 2004-11-15 0.5057600140571594 45.416664123535156 -0.8333340883255005 93.11 +20 2004-11-15 0.5057600140571594 45.5 -0.9166674017906189 56.78 +21 2004-12-15 0.5057600140571594 45.166664123535156 -0.6666674017906189 84.25 +22 2004-12-15 0.5057600140571594 45.416664123535156 -0.8333340883255005 81.83 +23 2004-12-15 0.5057600140571594 45.5 -0.9166674017906189 57.07 diff -r 000000000000 -r 225d0d275a24 test-data/dataset-ibi-reanalysis-bio-005-003-monthly-regulargrid_1510914389133.nc Binary file test-data/dataset-ibi-reanalysis-bio-005-003-monthly-regulargrid_1510914389133.nc has changed diff -r 000000000000 -r 225d0d275a24 test-data/info_file.txt --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/info_file.txt Fri May 22 05:19:15 2020 -0400 @@ -0,0 +1,74 @@ +xarray.Dataset { +dimensions: + depth = 1 ; + latitude = 97 ; + longitude = 103 ; + time = 145 ; + +variables: + float32 phy(time, depth, latitude, longitude) ; + phy:_CoordinateAxes = time depth latitude longitude ; + phy:long_name = Mole Concentration of Phytoplankton expressed as carbon in sea water ; + phy:standard_name = mole_concentration_of_phytoplankton_expressed_as_carbon_in_sea_water ; + phy:units = mmol.m-3 ; + phy:unit_long = mole_concentration_of_phytoplankton_expressed_as_carbon_in_sea_water ; + datetime64[ns] time(time) ; + time:standard_name = time ; + time:long_name = time ; + time:_CoordinateAxisType = Time ; + time:axis = T ; + float32 chl(time, depth, latitude, longitude) ; + chl:_CoordinateAxes = time depth latitude longitude ; + chl:long_name = Mass Concentration of Chlorophyll in Sea Water ; + chl:standard_name = mass_concentration_of_chlorophyll_in_sea_water ; + chl:units = mg.m-3 ; + chl:unit_long = milligram of chlorophyll per cubic meter ; + float32 nh4(time, depth, latitude, longitude) ; + nh4:_CoordinateAxes = time depth latitude longitude ; + nh4:long_name = Mole Concentration of Ammonium in Sea Water ; + nh4:standard_name = mole_concentration_of_ammonium_in_sea_water ; + nh4:units = mmol.m-3 ; + nh4:unit_long = millimoles of Ammonium per cubic meter ; + float32 longitude(longitude) ; + longitude:long_name = Longitude ; + longitude:units = degrees_east ; + longitude:standard_name = longitude ; + longitude:axis = X ; + longitude:unit_long = Degrees East ; + longitude:step = 0.08333f ; + longitude:_CoordinateAxisType = Lon ; + float32 latitude(latitude) ; + latitude:long_name = Latitude ; + latitude:units = degrees_north ; + latitude:standard_name = latitude ; + latitude:axis = Y ; + latitude:unit_long = Degrees North ; + latitude:step = 0.08333f ; + latitude:_CoordinateAxisType = Lat ; + float32 depth(depth) ; + depth:long_name = Depth ; + depth:units = m ; + depth:axis = Z ; + depth:positive = down ; + depth:unit_long = Meters ; + depth:standard_name = depth ; + depth:_CoordinateAxisType = Height ; + depth:_CoordinateZisPositive = down ; + +// global attributes: + :title = CMEMS IBI REANALYSIS: MONTHLY BIOGEOCHEMICAL PRODUCTS (REGULAR GRID) ; + :institution = Puertos del Estado (PdE) - Mercator-Ocean (MO) ; + :references = http://marine.copernicus.eu ; + :source = CMEMS IBI-MFC ; + :Conventions = CF-1.0 ; + :history = Data extracted from dataset http://puertos2.cesga.es:8080/thredds/dodsC/dataset-ibi-reanalysis-bio-005-003-monthly-regulargrid ; + :time_min = 7272.0 ; + :time_max = 112464.0 ; + :julian_day_unit = Hours since 2002-02-15 ; + :z_min = 0.5057600140571594 ; + :z_max = 0.5057600140571594 ; + :latitude_min = 43.0 ; + :latitude_max = 51.0 ; + :longitude_min = -6.000000476837158 ; + :longitude_max = 2.4999990463256836 ; +} \ No newline at end of file diff -r 000000000000 -r 225d0d275a24 test-data/var_tab_dataset-ibi --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/var_tab_dataset-ibi Fri May 22 05:19:15 2020 -0400 @@ -0,0 +1,7 @@ +time 1 time 145 +chl 4 time 145 depth 1 latitude 97 longitude 103 +nh4 4 time 145 depth 1 latitude 97 longitude 103 +longitude 1 longitude 103 +latitude 1 latitude 97 +depth 1 depth 1 +phy 4 time 145 depth 1 latitude 97 longitude 103 diff -r 000000000000 -r 225d0d275a24 xarray_select.xml --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/xarray_select.xml Fri May 22 05:19:15 2020 -0400 @@ -0,0 +1,298 @@ + + extracts variable values with custom conditions on dimensions + + python + netcdf4 + xarray + geopandas + shapely + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+ + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+ + + + condi_source_coord['coord_source'] == 'coord_from_file' + + + condi_source_coord['coord_source'] == 'coord_from_stdin' + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+ + + + + + + + +
+ + + + + + + + + + + + + +
+
+ + , <, >=, <=, [interval], ]interval[. + + + +**Input** + +A netcdf file (.nc). + +Variable tabular file from 'Netcdf Metadate Info'. + +Tabular file with coordinates and the following structure : 'lat' 'lon'. + + +**Outputs** + +A single output with values for the wanted variable if there is only one coordinate. + +A data collection where one file is created for every coordinate, if multiple coordinates from tabular file. + + +------------------------------------------------- + +The xarray select tool can be used after the xarray Info. + ]]> +
diff -r 000000000000 -r 225d0d275a24 xarray_tool.py --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/xarray_tool.py Fri May 22 05:19:15 2020 -0400 @@ -0,0 +1,302 @@ +# xarray tool for: +# - getting metadata information +# - select data and save results in csv file for further post-processing + +import argparse +import csv +import warnings + +import geopandas as gdp + +import pandas as pd + +from shapely.geometry import Point +from shapely.ops import nearest_points + +import xarray as xr + + +class XarrayTool (): + def __init__(self, infile, outfile_info="", outfile_summary="", + select="", outfile="", outputdir="", latname="", + latvalN="", latvalS="", lonname="", lonvalE="", + lonvalW="", filter_list="", coords="", time="", + verbose=False + ): + self.infile = infile + self.outfile_info = outfile_info + self.outfile_summary = outfile_summary + self.select = select + self.outfile = outfile + self.outputdir = outputdir + self.latname = latname + if latvalN != "" and latvalN is not None: + self.latvalN = float(latvalN) + else: + self.latvalN = "" + if latvalS != "" and latvalS is not None: + self.latvalS = float(latvalS) + else: + self.latvalS = "" + self.lonname = lonname + if lonvalE != "" and lonvalE is not None: + self.lonvalE = float(lonvalE) + else: + self.lonvalE = "" + if lonvalW != "" and lonvalW is not None: + self.lonvalW = float(lonvalW) + else: + self.lonvalW = "" + self.filter = filter_list + self.time = time + self.coords = coords + self.verbose = verbose + # initialization + self.dset = None + self.gset = None + if self.verbose: + print("infile: ", self.infile) + print("outfile_info: ", self.outfile_info) + print("outfile_summary: ", self.outfile_summary) + print("outfile: ", self.outfile) + print("select: ", self.select) + print("outfile: ", self.outfile) + print("outputdir: ", self.outputdir) + print("latname: ", self.latname) + print("latvalN: ", self.latvalN) + print("latvalS: ", self.latvalS) + print("lonname: ", self.lonname) + print("lonvalE: ", self.lonvalE) + print("lonvalW: ", self.lonvalW) + print("filter: ", self.filter) + print("time: ", self.time) + print("coords: ", self.coords) + + def info(self): + f = open(self.outfile_info, 'w') + ds = xr.open_dataset(self.infile) + ds.info(f) + f.close() + + def summary(self): + f = open(self.outfile_summary, 'w') + ds = xr.open_dataset(self.infile) + writer = csv.writer(f, delimiter='\t') + header = ['VariableName', 'NumberOfDimensions'] + for idx, val in enumerate(ds.dims.items()): + header.append('Dim'+str(idx)+'Name') + header.append('Dim'+str(idx)+'Size') + writer.writerow(header) + for name, da in ds.data_vars.items(): + line = [name] + line.append(len(ds[name].shape)) + for d, s in zip(da.shape, da.sizes): + line.append(s) + line.append(d) + writer.writerow(line) + for name, da in ds.coords.items(): + line = [name] + line.append(len(ds[name].shape)) + for d, s in zip(da.shape, da.sizes): + line.append(s) + line.append(d) + writer.writerow(line) + f.close() + + def rowfilter(self, single_filter): + split_filter = single_filter.split('#') + filter_varname = split_filter[0] + op = split_filter[1] + ll = float(split_filter[2]) + if (op == 'bi'): + rl = float(split_filter[3]) + if filter_varname == self.select: + # filter on values of the selected variable + if op == 'bi': + self.dset = self.dset.where((self.dset <= rl) & + (self.dset >= ll)) + elif op == 'le': + self.dset = self.dset.where(self.dset <= ll) + elif op == 'ge': + self.dset = self.dset.where(self.dset >= ll) + elif op == 'e': + self.dset = self.dset.where(self.dset == ll) + else: # filter on other dimensions of the selected variable + if op == 'bi': + self.dset = self.dset.sel({filter_varname: slice(ll, rl)}) + elif op == 'le': + self.dset = self.dset.sel({filter_varname: slice(None, ll)}) + elif op == 'ge': + self.dset = self.dset.sel({filter_varname: slice(ll, None)}) + elif op == 'e': + self.dset = self.dset.sel({filter_varname: ll}, + method='nearest') + + def selection(self): + if self.dset is None: + self.ds = xr.open_dataset(self.infile) + self.dset = self.ds[self.select] # select variable + if self.time: + self.datetime_selection() + if self.filter: + self.filter_selection() + + self.area_selection() + # convert to dataframe + self.gset = self.gset.to_dataframe().dropna(how='all').reset_index() + self.gset.to_csv(self.outfile, header=True, sep='\t') + + def datetime_selection(self): + split_filter = self.time.split('#') + time_varname = split_filter[0] + op = split_filter[1] + ll = split_filter[2] + if (op == 'sl'): + rl = split_filter[3] + self.dset = self.dset.sel({time_varname: slice(ll, rl)}) + elif (op == 'to'): + self.dset = self.dset.sel({time_varname: slice(None, ll)}) + elif (op == 'from'): + self.dset = self.dset.sel({time_varname: slice(ll, None)}) + elif (op == 'is'): + self.dset = self.dset.sel({time_varname: ll}, method='nearest') + + def filter_selection(self): + for single_filter in self.filter: + self.rowfilter(single_filter) + + def area_selection(self): + if self.latvalS != "" and self.lonvalW != "": + # Select geographical area + self.gset = self.dset.sel({self.latname: + slice(self.latvalS, self.latvalN), + self.lonname: + slice(self.lonvalW, self.lonvalE)}) + elif self.latvalN != "" and self.lonvalE != "": + # select nearest location + self.nearest_location() # find nearest location without NaN values + self.gset = self.dset.sel({self.latname: self.nearest_latvalN, + self.lonname: self.nearest_lonvalE}, + method='nearest') + else: + self.gset = self.dset + + def nearest_location(self): + # Build a geopandas dataframe with all first elements in each dimension + # so we assume null values correspond to a mask that is the same for + # all dimensions in the dataset. + dsel_frame = self.dset + for dim in self.dset.dims: + if dim != self.latname and dim != self.lonname: + dsel_frame = dsel_frame.isel({dim: 0}) + # transform to pandas dataframe + dff = dsel_frame.to_dataframe().dropna().reset_index() + # transform to geopandas to collocate + gdf = gdp.GeoDataFrame(dff, + geometry=gdp.points_from_xy(dff[self.lonname], + dff[self.latname])) + # Find nearest location where values are not null + point = Point(self.lonvalE, self.latvalN) + multipoint = gdf.geometry.unary_union + queried_geom, nearest_geom = nearest_points(point, multipoint) + self.nearest_latvalN = nearest_geom.y + self.nearest_lonvalE = nearest_geom.x + + def selection_from_coords(self): + fcoords = pd.read_csv(self.coords, sep='\t') + for row in fcoords.itertuples(): + self.latvalN = row[0] + self.lonvalE = row[1] + self.outfile = (self.outputdir + '/' + self.select + '_' + + str(row.Index) + '.tabular') + self.selection() + + +if __name__ == '__main__': + warnings.filterwarnings("ignore") + parser = argparse.ArgumentParser() + + parser.add_argument( + 'infile', + help='netCDF input filename' + ) + parser.add_argument( + '--info', + help='Output filename where metadata information is stored' + ) + parser.add_argument( + '--summary', + help='Output filename where data summary information is stored' + ) + parser.add_argument( + '--select', + help='Variable name to select' + ) + parser.add_argument( + '--latname', + help='Latitude name' + ) + parser.add_argument( + '--latvalN', + help='North latitude value' + ) + parser.add_argument( + '--latvalS', + help='South latitude value' + ) + parser.add_argument( + '--lonname', + help='Longitude name' + ) + parser.add_argument( + '--lonvalE', + help='East longitude value' + ) + parser.add_argument( + '--lonvalW', + help='West longitude value' + ) + parser.add_argument( + '--coords', + help='Input file containing Latitude and Longitude' + 'for geographical selection' + ) + parser.add_argument( + '--filter', + nargs="*", + help='Filter list variable#operator#value_s#value_e' + ) + parser.add_argument( + '--time', + help='select timeseries variable#operator#value_s[#value_e]' + ) + parser.add_argument( + '--outfile', + help='csv outfile for storing results of the selection' + '(valid only when --select)' + ) + parser.add_argument( + '--outputdir', + help='folder name for storing results with multiple selections' + '(valid only when --select)' + ) + parser.add_argument( + "-v", "--verbose", + help="switch on verbose mode", + action="store_true" + ) + args = parser.parse_args() + + p = XarrayTool(args.infile, args.info, args.summary, args.select, + args.outfile, args.outputdir, args.latname, + args.latvalN, args.latvalS, args.lonname, + args.lonvalE, args.lonvalW, args.filter, + args.coords, args.time, args.verbose) + if args.info: + p.info() + if args.summary: + p.summary() + if args.coords: + p.selection_from_coords() + elif args.select: + p.selection()