changeset 0:225d0d275a24 draft

"planemo upload for repository https://github.com/galaxyecology/tools-ecology/tree/master/tools/data_manipulation/xarray/ commit f1455c158011dc4aab0fd469cf794be6f4142992"
author ecology
date Fri, 22 May 2020 05:19:15 -0400
parents
children 6baac361495b
files README.md test-data/Metadata_infos_from_dataset-ibi-reanalysis-bio-005-003-monthly-regulargrid_1510914389133.nc.Variables.tab test-data/Test1.tabular test-data/Test2.tabular test-data/dataset-ibi-reanalysis-bio-005-003-monthly-regulargrid_1510914389133.nc test-data/info_file.txt test-data/var_tab_dataset-ibi xarray_select.xml xarray_tool.py
diffstat 9 files changed, 868 insertions(+), 0 deletions(-) [+]
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/README.md	Fri May 22 05:19:15 2020 -0400
@@ -0,0 +1,8 @@
+# Xarray tools for netCDF
+## netCDF metadata information
+
+The first tool `xarray_metadata_info ` uses xarray to provide users with general information about variable names, dimensions
+and attributes.
+Variables that can be extracted and dimensions available are printed in a tabular file.
+
+The tool also print a general information file. It's the result of the xarray method info().
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/Metadata_infos_from_dataset-ibi-reanalysis-bio-005-003-monthly-regulargrid_1510914389133.nc.Variables.tab	Fri May 22 05:19:15 2020 -0400
@@ -0,0 +1,8 @@
+VariableName	NumberOfDimensions	Dim0Name	Dim0Size	Dim1Name	Dim1Size	Dim2Name	Dim2Size	Dim3Name	Dim3Size
+phy	4	time	145	depth	1	latitude	97	longitude	103
+chl	4	time	145	depth	1	latitude	97	longitude	103
+nh4	4	time	145	depth	1	latitude	97	longitude	103
+time	1	time	145
+longitude	1	longitude	103
+latitude	1	latitude	97
+depth	1	depth	1
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/Test1.tabular	Fri May 22 05:19:15 2020 -0400
@@ -0,0 +1,146 @@
+	time	depth	longitude	latitude	phy
+0	2002-12-15	0.5057600140571594	-2.0000007	44.0	1.0500183
+1	2003-01-15	0.5057600140571594	-2.0000007	44.0	1.25
+2	2003-02-15	0.5057600140571594	-2.0000007	44.0	1.3000183
+3	2003-03-15	0.5057600140571594	-2.0000007	44.0	6.0599976
+4	2003-04-15	0.5057600140571594	-2.0000007	44.0	2.25
+5	2003-05-15	0.5057600140571594	-2.0000007	44.0	0.6499939
+6	2003-06-15	0.5057600140571594	-2.0000007	44.0	0.42999268
+7	2003-07-15	0.5057600140571594	-2.0000007	44.0	0.42999268
+8	2003-08-15	0.5057600140571594	-2.0000007	44.0	0.480011
+9	2003-09-15	0.5057600140571594	-2.0000007	44.0	0.45999146
+10	2003-10-15	0.5057600140571594	-2.0000007	44.0	0.5
+11	2003-11-15	0.5057600140571594	-2.0000007	44.0	0.9299927
+12	2003-12-15	0.5057600140571594	-2.0000007	44.0	1.3900146
+13	2004-01-15	0.5057600140571594	-2.0000007	44.0	1.7400208
+14	2004-02-15	0.5057600140571594	-2.0000007	44.0	4.5
+15	2004-03-15	0.5057600140571594	-2.0000007	44.0	5.5500183
+16	2004-04-15	0.5057600140571594	-2.0000007	44.0	5.3099976
+17	2004-05-15	0.5057600140571594	-2.0000007	44.0	3.75
+18	2004-06-15	0.5057600140571594	-2.0000007	44.0	0.77001953
+19	2004-07-15	0.5057600140571594	-2.0000007	44.0	0.5
+20	2004-08-15	0.5057600140571594	-2.0000007	44.0	0.45999146
+21	2004-09-15	0.5057600140571594	-2.0000007	44.0	0.4500122
+22	2004-10-15	0.5057600140571594	-2.0000007	44.0	0.480011
+23	2004-11-15	0.5057600140571594	-2.0000007	44.0	0.83999634
+24	2004-12-15	0.5057600140571594	-2.0000007	44.0	1.7400208
+25	2005-01-15	0.5057600140571594	-2.0000007	44.0	1.7700195
+26	2005-02-15	0.5057600140571594	-2.0000007	44.0	1.5500183
+27	2005-03-15	0.5057600140571594	-2.0000007	44.0	7.149994
+28	2005-04-15	0.5057600140571594	-2.0000007	44.0	3.649994
+29	2005-05-15	0.5057600140571594	-2.0000007	44.0	2.5200195
+30	2005-06-15	0.5057600140571594	-2.0000007	44.0	0.45999146
+31	2005-07-15	0.5057600140571594	-2.0000007	44.0	0.6700134
+32	2005-08-15	0.5057600140571594	-2.0000007	44.0	0.4500122
+33	2005-09-15	0.5057600140571594	-2.0000007	44.0	0.45999146
+34	2005-10-15	0.5057600140571594	-2.0000007	44.0	0.45999146
+35	2005-11-15	0.5057600140571594	-2.0000007	44.0	0.6199951
+36	2005-12-15	0.5057600140571594	-2.0000007	44.0	1.1499939
+37	2006-01-15	0.5057600140571594	-2.0000007	44.0	3.5299988
+38	2006-02-15	0.5057600140571594	-2.0000007	44.0	7.1799927
+39	2006-03-15	0.5057600140571594	-2.0000007	44.0	6.5599976
+40	2006-04-15	0.5057600140571594	-2.0000007	44.0	3.8000183
+41	2006-05-15	0.5057600140571594	-2.0000007	44.0	0.95999146
+42	2006-06-15	0.5057600140571594	-2.0000007	44.0	1.5
+43	2006-07-15	0.5057600140571594	-2.0000007	44.0	1.0299988
+44	2006-08-15	0.5057600140571594	-2.0000007	44.0	0.480011
+45	2006-09-15	0.5057600140571594	-2.0000007	44.0	0.49002075
+46	2006-10-15	0.5057600140571594	-2.0000007	44.0	0.480011
+47	2006-11-15	0.5057600140571594	-2.0000007	44.0	0.9299927
+48	2006-12-15	0.5057600140571594	-2.0000007	44.0	1.2099915
+49	2007-01-15	0.5057600140571594	-2.0000007	44.0	1.1499939
+50	2007-02-15	0.5057600140571594	-2.0000007	44.0	1.7000122
+51	2007-03-15	0.5057600140571594	-2.0000007	44.0	5.230011
+52	2007-04-15	0.5057600140571594	-2.0000007	44.0	3.8600159
+53	2007-05-15	0.5057600140571594	-2.0000007	44.0	0.83999634
+54	2007-06-15	0.5057600140571594	-2.0000007	44.0	0.6799927
+55	2007-07-15	0.5057600140571594	-2.0000007	44.0	0.45999146
+56	2007-08-15	0.5057600140571594	-2.0000007	44.0	0.45999146
+57	2007-09-15	0.5057600140571594	-2.0000007	44.0	0.5
+58	2007-10-15	0.5057600140571594	-2.0000007	44.0	0.89001465
+59	2007-11-15	0.5057600140571594	-2.0000007	44.0	2.0299988
+60	2007-12-15	0.5057600140571594	-2.0000007	44.0	1.8399963
+61	2008-01-15	0.5057600140571594	-2.0000007	44.0	1.3399963
+62	2008-02-15	0.5057600140571594	-2.0000007	44.0	3.149994
+63	2008-03-15	0.5057600140571594	-2.0000007	44.0	4.5899963
+64	2008-04-15	0.5057600140571594	-2.0000007	44.0	5.080017
+65	2008-05-15	0.5057600140571594	-2.0000007	44.0	1.0
+66	2008-06-15	0.5057600140571594	-2.0000007	44.0	1.5299988
+67	2008-07-15	0.5057600140571594	-2.0000007	44.0	0.55999756
+68	2008-08-15	0.5057600140571594	-2.0000007	44.0	0.42999268
+69	2008-09-15	0.5057600140571594	-2.0000007	44.0	0.42999268
+70	2008-10-15	0.5057600140571594	-2.0000007	44.0	0.42999268
+71	2008-11-15	0.5057600140571594	-2.0000007	44.0	0.64001465
+72	2008-12-15	0.5057600140571594	-2.0000007	44.0	2.4200134
+73	2009-01-15	0.5057600140571594	-2.0000007	44.0	2.3900146
+74	2009-02-15	0.5057600140571594	-2.0000007	44.0	6.2099915
+75	2009-03-15	0.5057600140571594	-2.0000007	44.0	4.6799927
+76	2009-04-15	0.5057600140571594	-2.0000007	44.0	1.1100159
+77	2009-05-15	0.5057600140571594	-2.0000007	44.0	2.649994
+78	2009-06-15	0.5057600140571594	-2.0000007	44.0	1.4900208
+79	2009-07-15	0.5057600140571594	-2.0000007	44.0	0.5
+80	2009-08-15	0.5057600140571594	-2.0000007	44.0	0.45999146
+81	2009-09-15	0.5057600140571594	-2.0000007	44.0	0.5800171
+82	2009-10-15	0.5057600140571594	-2.0000007	44.0	0.6499939
+83	2009-11-15	0.5057600140571594	-2.0000007	44.0	0.8999939
+84	2009-12-15	0.5057600140571594	-2.0000007	44.0	1.3099976
+85	2010-01-15	0.5057600140571594	-2.0000007	44.0	1.5299988
+86	2010-02-15	0.5057600140571594	-2.0000007	44.0	2.9599915
+87	2010-03-15	0.5057600140571594	-2.0000007	44.0	5.450012
+88	2010-04-15	0.5057600140571594	-2.0000007	44.0	7.5899963
+89	2010-05-15	0.5057600140571594	-2.0000007	44.0	1.8000183
+90	2010-06-15	0.5057600140571594	-2.0000007	44.0	0.480011
+91	2010-07-15	0.5057600140571594	-2.0000007	44.0	0.5
+92	2010-08-15	0.5057600140571594	-2.0000007	44.0	0.45999146
+93	2010-09-15	0.5057600140571594	-2.0000007	44.0	0.49002075
+94	2010-10-15	0.5057600140571594	-2.0000007	44.0	0.45999146
+95	2010-11-15	0.5057600140571594	-2.0000007	44.0	0.9299927
+96	2010-12-15	0.5057600140571594	-2.0000007	44.0	1.1499939
+97	2011-01-15	0.5057600140571594	-2.0000007	44.0	2.4900208
+98	2011-02-15	0.5057600140571594	-2.0000007	44.0	5.1799927
+99	2011-03-15	0.5057600140571594	-2.0000007	44.0	7.029999
+100	2011-04-15	0.5057600140571594	-2.0000007	44.0	2.4900208
+101	2011-05-15	0.5057600140571594	-2.0000007	44.0	0.6499939
+102	2011-06-15	0.5057600140571594	-2.0000007	44.0	0.52001953
+103	2011-07-15	0.5057600140571594	-2.0000007	44.0	0.5
+104	2011-08-15	0.5057600140571594	-2.0000007	44.0	0.75
+105	2011-09-15	0.5057600140571594	-2.0000007	44.0	0.45999146
+106	2011-10-15	0.5057600140571594	-2.0000007	44.0	0.480011
+107	2011-11-15	0.5057600140571594	-2.0000007	44.0	0.730011
+108	2011-12-15	0.5057600140571594	-2.0000007	44.0	1.0299988
+109	2012-01-15	0.5057600140571594	-2.0000007	44.0	3.149994
+110	2012-02-15	0.5057600140571594	-2.0000007	44.0	2.3099976
+111	2012-03-15	0.5057600140571594	-2.0000007	44.0	5.5200195
+112	2012-04-15	0.5057600140571594	-2.0000007	44.0	3.399994
+113	2012-05-15	0.5057600140571594	-2.0000007	44.0	3.7000122
+114	2012-06-15	0.5057600140571594	-2.0000007	44.0	2.5899963
+115	2012-07-15	0.5057600140571594	-2.0000007	44.0	0.45999146
+116	2012-08-15	0.5057600140571594	-2.0000007	44.0	0.4500122
+117	2012-09-15	0.5057600140571594	-2.0000007	44.0	0.45999146
+118	2012-10-15	0.5057600140571594	-2.0000007	44.0	0.61001587
+119	2012-11-15	0.5057600140571594	-2.0000007	44.0	2.0299988
+120	2012-12-15	0.5057600140571594	-2.0000007	44.0	1.4200134
+121	2013-01-15	0.5057600140571594	-2.0000007	44.0	2.2700195
+122	2013-02-15	0.5057600140571594	-2.0000007	44.0	7.0
+123	2013-03-15	0.5057600140571594	-2.0000007	44.0	10.550018
+124	2013-04-15	0.5057600140571594	-2.0000007	44.0	5.8399963
+125	2013-05-15	0.5057600140571594	-2.0000007	44.0	1.2400208
+126	2013-06-15	0.5057600140571594	-2.0000007	44.0	4.1700134
+127	2013-07-15	0.5057600140571594	-2.0000007	44.0	3.2099915
+128	2013-08-15	0.5057600140571594	-2.0000007	44.0	0.45999146
+129	2013-09-15	0.5057600140571594	-2.0000007	44.0	0.480011
+130	2013-10-15	0.5057600140571594	-2.0000007	44.0	0.49002075
+131	2013-11-15	0.5057600140571594	-2.0000007	44.0	0.7799988
+132	2013-12-15	0.5057600140571594	-2.0000007	44.0	1.4500122
+133	2014-01-15	0.5057600140571594	-2.0000007	44.0	0.95999146
+134	2014-02-15	0.5057600140571594	-2.0000007	44.0	1.3900146
+135	2014-03-15	0.5057600140571594	-2.0000007	44.0	5.779999
+136	2014-04-15	0.5057600140571594	-2.0000007	44.0	5.4299927
+137	2014-05-15	0.5057600140571594	-2.0000007	44.0	1.1799927
+138	2014-06-15	0.5057600140571594	-2.0000007	44.0	0.730011
+139	2014-07-15	0.5057600140571594	-2.0000007	44.0	0.45999146
+140	2014-08-15	0.5057600140571594	-2.0000007	44.0	0.45999146
+141	2014-09-15	0.5057600140571594	-2.0000007	44.0	0.5
+142	2014-10-15	0.5057600140571594	-2.0000007	44.0	0.6199951
+143	2014-11-15	0.5057600140571594	-2.0000007	44.0	0.480011
+144	2014-12-15	0.5057600140571594	-2.0000007	44.0	0.55999756
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/Test2.tabular	Fri May 22 05:19:15 2020 -0400
@@ -0,0 +1,25 @@
+	time	depth	latitude	longitude	nh4
+0	2003-12-15	0.5057600140571594	45.166664123535156	-0.6666674017906189	81.27
+1	2003-12-15	0.5057600140571594	45.416664123535156	-0.8333340883255005	78.08
+2	2003-12-15	0.5057600140571594	45.5	-0.9166674017906189	55.149998
+3	2004-01-15	0.5057600140571594	45.166664123535156	-0.6666674017906189	65.2
+4	2004-01-15	0.5057600140571594	45.416664123535156	-0.8333340883255005	64.11
+5	2004-02-15	0.5057600140571594	45.166664123535156	-0.6666674017906189	51.0
+6	2004-02-15	0.5057600140571594	45.416664123535156	-0.8333340883255005	51.32
+7	2004-05-15	0.5057600140571594	45.166664123535156	-0.6666674017906189	54.53
+8	2004-06-15	0.5057600140571594	45.166664123535156	-0.6666674017906189	79.79
+9	2004-06-15	0.5057600140571594	45.416664123535156	-0.8333340883255005	61.52
+10	2004-07-15	0.5057600140571594	45.166664123535156	-0.6666674017906189	99.159996
+11	2004-07-15	0.5057600140571594	45.416664123535156	-0.8333340883255005	77.93
+12	2004-08-15	0.5057600140571594	45.166664123535156	-0.6666674017906189	110.149994
+13	2004-08-15	0.5057600140571594	45.416664123535156	-0.8333340883255005	86.759995
+14	2004-09-15	0.5057600140571594	45.166664123535156	-0.6666674017906189	112.369995
+15	2004-09-15	0.5057600140571594	45.416664123535156	-0.8333340883255005	91.979996
+16	2004-10-15	0.5057600140571594	45.166664123535156	-0.6666674017906189	109.63
+17	2004-10-15	0.5057600140571594	45.416664123535156	-0.8333340883255005	95.509995
+18	2004-11-15	0.5057600140571594	45.166664123535156	-0.6666674017906189	98.45
+19	2004-11-15	0.5057600140571594	45.416664123535156	-0.8333340883255005	93.11
+20	2004-11-15	0.5057600140571594	45.5	-0.9166674017906189	56.78
+21	2004-12-15	0.5057600140571594	45.166664123535156	-0.6666674017906189	84.25
+22	2004-12-15	0.5057600140571594	45.416664123535156	-0.8333340883255005	81.83
+23	2004-12-15	0.5057600140571594	45.5	-0.9166674017906189	57.07
Binary file test-data/dataset-ibi-reanalysis-bio-005-003-monthly-regulargrid_1510914389133.nc has changed
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/info_file.txt	Fri May 22 05:19:15 2020 -0400
@@ -0,0 +1,74 @@
+xarray.Dataset {
+dimensions:
+	depth = 1 ;
+	latitude = 97 ;
+	longitude = 103 ;
+	time = 145 ;
+
+variables:
+	float32 phy(time, depth, latitude, longitude) ;
+		phy:_CoordinateAxes = time depth latitude longitude  ;
+		phy:long_name = Mole Concentration of Phytoplankton expressed as carbon in sea water ;
+		phy:standard_name = mole_concentration_of_phytoplankton_expressed_as_carbon_in_sea_water ;
+		phy:units = mmol.m-3 ;
+		phy:unit_long = mole_concentration_of_phytoplankton_expressed_as_carbon_in_sea_water ;
+	datetime64[ns] time(time) ;
+		time:standard_name = time ;
+		time:long_name = time ;
+		time:_CoordinateAxisType = Time ;
+		time:axis = T ;
+	float32 chl(time, depth, latitude, longitude) ;
+		chl:_CoordinateAxes = time depth latitude longitude  ;
+		chl:long_name = Mass Concentration of Chlorophyll in Sea Water ;
+		chl:standard_name = mass_concentration_of_chlorophyll_in_sea_water ;
+		chl:units = mg.m-3 ;
+		chl:unit_long = milligram of chlorophyll per cubic meter ;
+	float32 nh4(time, depth, latitude, longitude) ;
+		nh4:_CoordinateAxes = time depth latitude longitude  ;
+		nh4:long_name = Mole Concentration of Ammonium in Sea Water ;
+		nh4:standard_name = mole_concentration_of_ammonium_in_sea_water ;
+		nh4:units = mmol.m-3 ;
+		nh4:unit_long = millimoles of Ammonium per cubic meter ;
+	float32 longitude(longitude) ;
+		longitude:long_name = Longitude ;
+		longitude:units = degrees_east ;
+		longitude:standard_name = longitude ;
+		longitude:axis = X ;
+		longitude:unit_long = Degrees East ;
+		longitude:step = 0.08333f ;
+		longitude:_CoordinateAxisType = Lon ;
+	float32 latitude(latitude) ;
+		latitude:long_name = Latitude ;
+		latitude:units = degrees_north ;
+		latitude:standard_name = latitude ;
+		latitude:axis = Y ;
+		latitude:unit_long = Degrees North ;
+		latitude:step = 0.08333f ;
+		latitude:_CoordinateAxisType = Lat ;
+	float32 depth(depth) ;
+		depth:long_name = Depth ;
+		depth:units = m ;
+		depth:axis = Z ;
+		depth:positive = down ;
+		depth:unit_long = Meters ;
+		depth:standard_name = depth ;
+		depth:_CoordinateAxisType = Height ;
+		depth:_CoordinateZisPositive = down ;
+
+// global attributes:
+	:title = CMEMS IBI REANALYSIS: MONTHLY BIOGEOCHEMICAL PRODUCTS (REGULAR GRID)  ;
+	:institution = Puertos del Estado (PdE) - Mercator-Ocean (MO)  ;
+	:references = http://marine.copernicus.eu ;
+	:source = CMEMS IBI-MFC ;
+	:Conventions = CF-1.0 ;
+	:history = Data extracted from dataset http://puertos2.cesga.es:8080/thredds/dodsC/dataset-ibi-reanalysis-bio-005-003-monthly-regulargrid ;
+	:time_min = 7272.0 ;
+	:time_max = 112464.0 ;
+	:julian_day_unit = Hours since 2002-02-15 ;
+	:z_min = 0.5057600140571594 ;
+	:z_max = 0.5057600140571594 ;
+	:latitude_min = 43.0 ;
+	:latitude_max = 51.0 ;
+	:longitude_min = -6.000000476837158 ;
+	:longitude_max = 2.4999990463256836 ;
+}
\ No newline at end of file
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/var_tab_dataset-ibi	Fri May 22 05:19:15 2020 -0400
@@ -0,0 +1,7 @@
+time	1	time	145	 	 	 	 	 	 
+chl	4	time	145	depth	1	latitude	97	longitude	103
+nh4	4	time	145	depth	1	latitude	97	longitude	103
+longitude	1	longitude	103	 	 	 	 	 	 
+latitude	1	latitude	97	 	 	 	 	 	 
+depth	1	depth	1	 	 	 	 	 	 
+phy	4	time	145	depth	1	latitude	97	longitude	103
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/xarray_select.xml	Fri May 22 05:19:15 2020 -0400
@@ -0,0 +1,298 @@
+<tool id="xarray_select" name="NetCDF xarray Selection" version="0.15.1">
+    <description>extracts variable values with custom conditions on dimensions</description>
+    <requirements>
+        <requirement type="package" version="3">python</requirement>
+        <requirement type="package" version="1.5.3">netcdf4</requirement>
+        <requirement type="package" version="0.15.1">xarray</requirement>
+        <requirement type="package" version="0.7.0">geopandas</requirement>
+        <requirement type="package" version="1.7.0">shapely</requirement>
+    </requirements>
+    <command detect_errors="exit_code"><![CDATA[
+    mkdir output_dir &&
+    python '$__tool_directory__/xarray_tool.py' '$input' --select '$var'
+           --verbose
+           --filter
+    #for $i,$uc in enumerate($user_choice)
+        #if $uc.condi_between.comparator=="bi"
+             '${uc.dim}#${uc.condi_between.comparator}#${uc.condi_between.t1}#${uc.condi_between.t2}'
+        #else
+             '${uc.dim}#${uc.condi_between.comparator}#${uc.condi_between.value}'
+        #end if
+    #end for
+
+    #if $time.condi_datetime.datetime=="yes"
+         --time
+        #if $time.condi_datetime.condi_between.comparator=="sl"
+             '${time.condi_datetime.dim}#${time.condi_datetime.condi_between.comparator}#${time.condi_datetime.condi_between.t1}#${time.condi_datetime.condi_between.t2}'
+        #else
+             '${time.condi_datetime.dim}#${time.condi_datetime.condi_between.comparator}#${time.condi_datetime.condi_between.t1}'
+        #end if
+    #end if
+
+    #if $condi_source_coord.coord_source=="coord_from_file"
+         --coords '$coord_tabular'
+         --latname '$condi_source_coord.lat_dim' --lonname '$condi_source_coord.lon_dim'
+         --outputdir output_dir
+    #else
+         --outfile 'final.tabular'
+        #if $condi_source_coord.condi_coord.coord=='single'
+           --latname $condi_source_coord.condi_coord.lat_dim
+           --latvalN $condi_source_coord.condi_coord.lat_val
+           --lonname $condi_source_coord.condi_coord.lon_dim
+           --lonvalE $condi_source_coord.condi_coord.lon_val
+        #elif $condi_source_coord.condi_coord.coord=='subregion'
+           --latname $condi_source_coord.condi_coord.lat_dim
+           --latvalN $condi_source_coord.condi_coord.lat_valN
+           --latvalS $condi_source_coord.condi_coord.lat_valS
+           --lonname $condi_source_coord.condi_coord.lon_dim
+           --lonvalE $condi_source_coord.condi_coord.lon_valE
+           --lonvalW $condi_source_coord.condi_coord.lon_valW
+        #end if
+    #end if
+    ]]></command>
+    <inputs>
+        <param type="data" name="input" label="Input netcdf file" format="netcdf"/>
+        <param type="data" label="Tabular of variables" name="var_tab" format="tabular" help="Select the tabular file which summarize the available variables and dimensions."/>
+
+        <param name="var" type="select" label="Choose the variable to extract">
+            <options from_dataset="var_tab">
+                <column name="name" index="0"/>
+                <column name="value" index="0"/>
+            </options>
+        </param>
+
+        <conditional name="condi_source_coord">
+            <param name="coord_source" type="select" label="Source of coordinates">
+                <option value="coord_from_stdin">Manually enter coordinates</option>
+                <option value="coord_from_file">Use coordinates from input file</option>
+            </param>
+
+            <when value="coord_from_file">
+                <param type="data" label="Tabular of coord" name="coord_tabular" format="tabular" help="Format : Latitude	Longitude"/>
+                <param name="lat_dim" type="select" label="Name of latitude coordinate" >
+                    <options from_dataset="var_tab">
+                        <column name="value" index="0"/>
+                    </options>
+                </param>
+                <param name="lon_dim" type="select" label="Name of longitude coordinate" >
+                    <options from_dataset="var_tab">
+                        <column name="value" index="0"/>
+                    </options>
+                </param>
+            </when>
+
+            <when value="coord_from_stdin">
+                <conditional name="condi_coord">
+                    <param name="coord" type="select" label="Geographical area" help="Use this option to get valid values at your custom coordinates.">
+                        <option value="global">Whole available region</option>
+                        <option value="single">Single location</option>
+                        <option value="subregion">Sub-region extraction</option>
+                    </param>
+                    <when value="single">
+                        <param name="lat_dim" type="select" label="Name of latitude coordinate" >
+                            <options from_dataset="var_tab">
+                                <column name="value" index="0"/>
+                            </options>
+                        </param>
+                        <param name="lat_val" type="float" value="0" label="Latitude"/>
+                        <param name="lon_dim" type="select" label="Name of longitude coordinate" >
+                            <options from_dataset="var_tab">
+                                <column name="value" index="0"/>
+                            </options>
+                        </param>
+                        <param name="lon_val" type="float" value="0" label="Longitude"/>
+                    </when>
+                    <when value="subregion">
+                        <param name="lat_dim" type="select" label="Name of latitude coordinate" >
+                            <options from_dataset="var_tab">
+                                <column name="value" index="0"/>
+                            </options>
+                        </param>
+                        <param name="lat_valN" type="float" value="0" label="Latitude North"/>
+                        <param name="lat_valS" type="float" value="0" label="Latitude South"/>
+                        <param name="lon_dim" type="select" label="Name of longitude coordinate" >
+                            <options from_dataset="var_tab">
+                                <column name="value" index="0"/>
+                            </options>
+                        </param>
+                        <param name="lon_valE" type="float" value="0" label="Longitude East"/>
+                        <param name="lon_valW" type="float" value="0" label="Longitude West"/>
+                    </when>
+                    <when value="global"></when>
+                </conditional>
+            </when>
+
+        </conditional>
+
+        <section name="time" title="Select Time series" expanded="false">
+            <conditional name="condi_datetime">
+                <param name="datetime" type="select" label="Datetime selection" help="Use this option to extract timeseries from your dataset">
+                    <option value="no">No</option>
+                    <option value="yes">Yes</option>
+                </param>
+                <when value="no"></when>
+                <when value="yes">
+                    <param name="dim" type="select" label="Select datetime dimension" help="Use this option only if your dataset contains a date/time dimension">
+                        <options from_dataset="var_tab">
+                            <column name="value" index="0"/>
+                        </options>
+                    </param>
+                    <conditional name="condi_between">
+                        <param name="comparator" type="select" label="date/Time selection">
+                            <option value="is">is</option>
+                            <option value="sl">slice</option>
+                            <option value="ne">nearest</option>
+                            <option value="to">to</option>
+                            <option value="from">from</option>
+                        </param>
+                        <when value="sl">
+                            <param name="t1" type="text" value="" label="Start date/time"/>
+                            <param name="t2" type="text" value="" label="End date/time"/>
+                        </when>
+                        <when value="is">
+                            <param name="t1" type="text" value="" label="date/time"/>
+                        </when>
+                        <when value="ne">
+                            <param name="t1" type="text" value="" label="date/time"/>
+                        </when>
+                        <when value="to">
+                            <param name="t1" type="text" value="" label="date/time"/>
+                        </when>
+                        <when value="from">
+                            <param name="t1" type="text" value="" label="date/time"/>
+                        </when>
+                    </conditional>
+                </when>
+            </conditional>
+        </section>
+        <repeat name="user_choice" title="additional filter" help="Use this option to filter on the selected dataset">
+            <param name="dim" type="select" label="Dimensions">
+                <options from_dataset="var_tab">
+                    <column name="value" index="0"/>
+                </options>
+            </param>
+            <conditional name="condi_between">
+                <param name="comparator" type="select" label="Comparator">
+                    <option value="e">Equal</option>
+                    <option value="ge">Greater or equal</option>
+                    <option value="le">Less or equal</option>
+                    <option value="bi">Between-include [threshold1,threshold2]</option>
+                </param>
+                <when value="bi">
+                    <param name="t1" type="text" value="0" label="Inferior threshold"/>
+                    <param name="t2" type="text" value="0" label="Superior threshold"/>
+                </when>
+                <when value="e">
+                    <param name="value" type="text" value="0" label="Value"/>
+                </when>
+                <when value="ge">
+                    <param name="value" type="text" value="0" label="Value"/>
+                </when>
+                <when value="le">
+                    <param name="value" type="text" value="0" label="Value"/>
+                </when>
+            </conditional>
+        </repeat>
+    </inputs>
+    <outputs>
+        <collection type="list" name="output">
+            <discover_datasets pattern="__designation_and_ext__" visible="false" format="tabular" directory="output_dir"/>
+            <filter>condi_source_coord['coord_source'] == 'coord_from_file'</filter>
+        </collection>
+        <data name="simpleoutput" from_work_dir="final.tabular" format="tabular">
+            <filter>condi_source_coord['coord_source'] == 'coord_from_stdin'</filter>
+        </data>
+    </outputs>
+    <tests>
+        <test>
+             <param name="input" value="dataset-ibi-reanalysis-bio-005-003-monthly-regulargrid_1510914389133.nc"/>
+             <param name="var" value="phy"/>
+             <param name="var_tab" value="var_tab_dataset-ibi"/>
+             <conditional name="condi_source_coord">
+                 <param name="coord_source" value="coord_from_stdin"/>
+                  <conditional name="condi_coord">
+                    <param name="coord" value="single"/>
+                    <param name="lat_dim" value="latitude"/>
+                    <param name="lat_val" value="44.0"/>
+                    <param name="lon_dim" value="longitude"/>
+                    <param name="lon_val" value="-2.0"/>
+                  </conditional>
+             </conditional>
+             <output name="simpleoutput" value="Test1.tabular">
+                 <assert_contents>
+                     <has_text_matching expression="0\t2002-12-15\t0.5"/>
+                     <has_text_matching expression="144\t2014-12-15\t0.5"/>
+                 </assert_contents>
+             </output>
+        </test>
+        <test>
+             <param name="input" value="dataset-ibi-reanalysis-bio-005-003-monthly-regulargrid_1510914389133.nc"/>
+             <param name="var_tab" value="var_tab_dataset-ibi"/>
+             <param name="var" value="nh4"/>
+
+             <conditional name="condi_source_coord">
+                 <param name="coord_source" value="coord_from_stdin"/>
+                  <conditional name="condi_coord">
+                    <param name="coord" value="global"/>
+                  </conditional>
+             </conditional>
+             <section name="time">
+                 <conditional name="condi_datetime">
+                     <param name="datetime" value="yes"/>
+                     <conditional name="condi_between">
+                         <param name="comparator" value="sl"/>
+                         <param name="t1" value="2003-12-15" />
+                         <param name="t2" value="2004-12-15" />
+                     </conditional>
+                 </conditional>
+             </section>
+             <repeat name="user_choice">
+                 <param name="dim" value="nh4"/>
+                 <conditional name="condi_between">
+                    <param name="comparator" value="ge"/>
+                    <param name="value" value="50."/>
+                 </conditional>
+             </repeat>
+             <output name="simpleoutput" value="Test2.tabular">
+                 <assert_contents>
+                     <has_text_matching expression="0\t2003-12-15\t0.5"/>
+                     <has_text_matching expression="23\t2004-12-15\t0.5"/>
+                 </assert_contents>
+             </output>
+        </test>
+    </tests>
+
+    <help><![CDATA[
+**What it does**
+
+This tool extracts variable values with custom conditions on dimensions.
+
+It can use manualy given coordinates or automaticaly take them from a tabular file to filter informations.
+
+If no values are availables at a coordinate X, the tool will search the closest coordinate with a non NA value.
+
+Filter can be set on every dimension. Available filtering operations are : =, >, <, >=, <=, [interval], ]interval[.
+
+
+
+**Input**
+
+A netcdf file (.nc).
+
+Variable tabular file from 'Netcdf Metadate Info'.
+
+Tabular file with coordinates and the following structure : 'lat'	'lon'.
+
+
+**Outputs**
+
+A single output with values for the wanted variable if there is only one coordinate.
+
+A data collection where one file is created for every coordinate, if multiple coordinates from tabular file.
+
+
+-------------------------------------------------
+
+The xarray select tool can be used after the xarray Info.
+    ]]></help>
+</tool>
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/xarray_tool.py	Fri May 22 05:19:15 2020 -0400
@@ -0,0 +1,302 @@
+# xarray tool for:
+# - getting metadata information
+# - select data and save results in csv file for further post-processing
+
+import argparse
+import csv
+import warnings
+
+import geopandas as gdp
+
+import pandas as pd
+
+from shapely.geometry import Point
+from shapely.ops import nearest_points
+
+import xarray as xr
+
+
+class XarrayTool ():
+    def __init__(self, infile, outfile_info="", outfile_summary="",
+                 select="", outfile="", outputdir="", latname="",
+                 latvalN="", latvalS="", lonname="", lonvalE="",
+                 lonvalW="", filter_list="", coords="", time="",
+                 verbose=False
+                 ):
+        self.infile = infile
+        self.outfile_info = outfile_info
+        self.outfile_summary = outfile_summary
+        self.select = select
+        self.outfile = outfile
+        self.outputdir = outputdir
+        self.latname = latname
+        if latvalN != "" and latvalN is not None:
+            self.latvalN = float(latvalN)
+        else:
+            self.latvalN = ""
+        if latvalS != "" and latvalS is not None:
+            self.latvalS = float(latvalS)
+        else:
+            self.latvalS = ""
+        self.lonname = lonname
+        if lonvalE != "" and lonvalE is not None:
+            self.lonvalE = float(lonvalE)
+        else:
+            self.lonvalE = ""
+        if lonvalW != "" and lonvalW is not None:
+            self.lonvalW = float(lonvalW)
+        else:
+            self.lonvalW = ""
+        self.filter = filter_list
+        self.time = time
+        self.coords = coords
+        self.verbose = verbose
+        # initialization
+        self.dset = None
+        self.gset = None
+        if self.verbose:
+            print("infile: ", self.infile)
+            print("outfile_info: ", self.outfile_info)
+            print("outfile_summary: ", self.outfile_summary)
+            print("outfile: ", self.outfile)
+            print("select: ", self.select)
+            print("outfile: ", self.outfile)
+            print("outputdir: ", self.outputdir)
+            print("latname: ", self.latname)
+            print("latvalN: ", self.latvalN)
+            print("latvalS: ", self.latvalS)
+            print("lonname: ", self.lonname)
+            print("lonvalE: ", self.lonvalE)
+            print("lonvalW: ", self.lonvalW)
+            print("filter: ", self.filter)
+            print("time: ", self.time)
+            print("coords: ", self.coords)
+
+    def info(self):
+        f = open(self.outfile_info, 'w')
+        ds = xr.open_dataset(self.infile)
+        ds.info(f)
+        f.close()
+
+    def summary(self):
+        f = open(self.outfile_summary, 'w')
+        ds = xr.open_dataset(self.infile)
+        writer = csv.writer(f, delimiter='\t')
+        header = ['VariableName', 'NumberOfDimensions']
+        for idx, val in enumerate(ds.dims.items()):
+            header.append('Dim'+str(idx)+'Name')
+            header.append('Dim'+str(idx)+'Size')
+        writer.writerow(header)
+        for name, da in ds.data_vars.items():
+            line = [name]
+            line.append(len(ds[name].shape))
+            for d, s in zip(da.shape, da.sizes):
+                line.append(s)
+                line.append(d)
+            writer.writerow(line)
+        for name, da in ds.coords.items():
+            line = [name]
+            line.append(len(ds[name].shape))
+            for d, s in zip(da.shape, da.sizes):
+                line.append(s)
+                line.append(d)
+            writer.writerow(line)
+        f.close()
+
+    def rowfilter(self, single_filter):
+        split_filter = single_filter.split('#')
+        filter_varname = split_filter[0]
+        op = split_filter[1]
+        ll = float(split_filter[2])
+        if (op == 'bi'):
+            rl = float(split_filter[3])
+        if filter_varname == self.select:
+            # filter on values of the selected variable
+            if op == 'bi':
+                self.dset = self.dset.where((self.dset <= rl) &
+                                            (self.dset >= ll))
+            elif op == 'le':
+                self.dset = self.dset.where(self.dset <= ll)
+            elif op == 'ge':
+                self.dset = self.dset.where(self.dset >= ll)
+            elif op == 'e':
+                self.dset = self.dset.where(self.dset == ll)
+        else:  # filter on other dimensions of the selected variable
+            if op == 'bi':
+                self.dset = self.dset.sel({filter_varname: slice(ll, rl)})
+            elif op == 'le':
+                self.dset = self.dset.sel({filter_varname: slice(None, ll)})
+            elif op == 'ge':
+                self.dset = self.dset.sel({filter_varname: slice(ll, None)})
+            elif op == 'e':
+                self.dset = self.dset.sel({filter_varname: ll},
+                                          method='nearest')
+
+    def selection(self):
+        if self.dset is None:
+            self.ds = xr.open_dataset(self.infile)
+            self.dset = self.ds[self.select]  # select variable
+            if self.time:
+                self.datetime_selection()
+            if self.filter:
+                self.filter_selection()
+
+        self.area_selection()
+        # convert to dataframe
+        self.gset = self.gset.to_dataframe().dropna(how='all').reset_index()
+        self.gset.to_csv(self.outfile, header=True, sep='\t')
+
+    def datetime_selection(self):
+        split_filter = self.time.split('#')
+        time_varname = split_filter[0]
+        op = split_filter[1]
+        ll = split_filter[2]
+        if (op == 'sl'):
+            rl = split_filter[3]
+            self.dset = self.dset.sel({time_varname: slice(ll, rl)})
+        elif (op == 'to'):
+            self.dset = self.dset.sel({time_varname: slice(None, ll)})
+        elif (op == 'from'):
+            self.dset = self.dset.sel({time_varname: slice(ll, None)})
+        elif (op == 'is'):
+            self.dset = self.dset.sel({time_varname: ll}, method='nearest')
+
+    def filter_selection(self):
+        for single_filter in self.filter:
+            self.rowfilter(single_filter)
+
+    def area_selection(self):
+        if self.latvalS != "" and self.lonvalW != "":
+            # Select geographical area
+            self.gset = self.dset.sel({self.latname:
+                                       slice(self.latvalS, self.latvalN),
+                                       self.lonname:
+                                       slice(self.lonvalW, self.lonvalE)})
+        elif self.latvalN != "" and self.lonvalE != "":
+            # select nearest location
+            self.nearest_location()  # find nearest location without NaN values
+            self.gset = self.dset.sel({self.latname: self.nearest_latvalN,
+                                       self.lonname: self.nearest_lonvalE},
+                                      method='nearest')
+        else:
+            self.gset = self.dset
+
+    def nearest_location(self):
+        # Build a geopandas dataframe with all first elements in each dimension
+        # so we assume null values correspond to a mask that is the same for
+        # all dimensions in the dataset.
+        dsel_frame = self.dset
+        for dim in self.dset.dims:
+            if dim != self.latname and dim != self.lonname:
+                dsel_frame = dsel_frame.isel({dim: 0})
+        # transform to pandas dataframe
+        dff = dsel_frame.to_dataframe().dropna().reset_index()
+        # transform to geopandas to collocate
+        gdf = gdp.GeoDataFrame(dff,
+                               geometry=gdp.points_from_xy(dff[self.lonname],
+                                                           dff[self.latname]))
+        # Find nearest location where values are not null
+        point = Point(self.lonvalE, self.latvalN)
+        multipoint = gdf.geometry.unary_union
+        queried_geom, nearest_geom = nearest_points(point, multipoint)
+        self.nearest_latvalN = nearest_geom.y
+        self.nearest_lonvalE = nearest_geom.x
+
+    def selection_from_coords(self):
+        fcoords = pd.read_csv(self.coords, sep='\t')
+        for row in fcoords.itertuples():
+            self.latvalN = row[0]
+            self.lonvalE = row[1]
+            self.outfile = (self.outputdir + '/' + self.select + '_'
+                            + str(row.Index) + '.tabular')
+            self.selection()
+
+
+if __name__ == '__main__':
+    warnings.filterwarnings("ignore")
+    parser = argparse.ArgumentParser()
+
+    parser.add_argument(
+        'infile',
+        help='netCDF input filename'
+    )
+    parser.add_argument(
+        '--info',
+        help='Output filename where metadata information is stored'
+    )
+    parser.add_argument(
+        '--summary',
+        help='Output filename where data summary information is stored'
+    )
+    parser.add_argument(
+        '--select',
+        help='Variable name to select'
+    )
+    parser.add_argument(
+        '--latname',
+        help='Latitude name'
+    )
+    parser.add_argument(
+        '--latvalN',
+        help='North latitude value'
+    )
+    parser.add_argument(
+        '--latvalS',
+        help='South latitude value'
+    )
+    parser.add_argument(
+        '--lonname',
+        help='Longitude name'
+    )
+    parser.add_argument(
+        '--lonvalE',
+        help='East longitude value'
+    )
+    parser.add_argument(
+        '--lonvalW',
+        help='West longitude value'
+    )
+    parser.add_argument(
+        '--coords',
+        help='Input file containing Latitude and Longitude'
+             'for geographical selection'
+    )
+    parser.add_argument(
+        '--filter',
+        nargs="*",
+        help='Filter list variable#operator#value_s#value_e'
+    )
+    parser.add_argument(
+        '--time',
+        help='select timeseries variable#operator#value_s[#value_e]'
+    )
+    parser.add_argument(
+        '--outfile',
+        help='csv outfile for storing results of the selection'
+             '(valid only when --select)'
+    )
+    parser.add_argument(
+        '--outputdir',
+        help='folder name for storing results with multiple selections'
+             '(valid only when --select)'
+    )
+    parser.add_argument(
+        "-v", "--verbose",
+        help="switch on verbose mode",
+        action="store_true"
+    )
+    args = parser.parse_args()
+
+    p = XarrayTool(args.infile, args.info, args.summary, args.select,
+                   args.outfile, args.outputdir, args.latname,
+                   args.latvalN, args.latvalS, args.lonname,
+                   args.lonvalE, args.lonvalW, args.filter,
+                   args.coords, args.time, args.verbose)
+    if args.info:
+        p.info()
+    if args.summary:
+        p.summary()
+    if args.coords:
+        p.selection_from_coords()
+    elif args.select:
+        p.selection()