# HG changeset patch # User mnhn65mo # Date 1533216278 14400 # Node ID 8da8ec7da45f9b50d95047c4f36359d3f83175fe Uploaded diff -r 000000000000 -r 8da8ec7da45f netcdf_metadata_info.xml --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/netcdf_metadata_info.xml Thu Aug 02 09:24:38 2018 -0400 @@ -0,0 +1,77 @@ + + summarize content of a nc file + + netcdf-metadata-info + + dimensions_\$a + ;done \$f.tabular ; done + && + for f in dimensions_*.tabular;do + awk 'NR % 2 != 0' \$f > \$f.2 + && + sed 1d \$f.2 > \$f + && + rm \$f.2 + ;done + && + ncdump -h '$input' > '$info' + ]]> + + + + + + + + + + + + diff -r 000000000000 -r 8da8ec7da45f netcdf_read.py --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/netcdf_read.py Thu Aug 02 09:24:38 2018 -0400 @@ -0,0 +1,421 @@ +import netCDF4 +from netCDF4 import Dataset +import numpy as np +import matplotlib +matplotlib.use("Agg") +import matplotlib.pyplot as plt +from pylab import * +import sys +import os +from scipy import spatial +from math import radians, cos, sin, asin, sqrt +import itertools + +##################### +##################### + +def checklist(dim_list, dim_name, filtre, threshold): + if not dim_list: + error="Error "+str(dim_name)+" has no value "+str(filtre)+" "+str(threshold) + sys.exit(error) + + +#Return dist in km between two coord +#Thx to : https://stackoverflow.com/questions/4913349/haversine-formula-in-python-bearing-and-distance-between-two-gps-points +def haversine(lon1, lat1, lon2, lat2): + """ + Calculate the great circle distance between two points + on the earth (specified in decimal degrees) + """ + # convert decimal degrees to radians + lon1, lat1, lon2, lat2 = map(radians, [lon1, lat1, lon2, lat2]) + + # haversine formula + dlon = lon2 - lon1 + dlat = lat2 - lat1 + a = sin(dlat/2)**2 + cos(lat1) * cos(lat2) * sin(dlon/2)**2 + c = 2 * asin(sqrt(a)) + r = 6371 # Radius of earth in kilometers. Use 3956 for miles + return c * r + + +#Comparison functions, return a list of indexes for the user conditions +def is_strict_inf(filename, dim_name, threshold): + list_dim=[] + for i in range(0,filename.variables[dim_name].size): + if filename.variables[dim_name][i] < threshold: + list_dim.append(i) + checklist(list_dim,dim_name,"<",threshold) + return list_dim + +def is_equal_inf(filename, dim_name, threshold): + list_dim=[] + for i in range(0,filename.variables[dim_name].size): + if filename.variables[dim_name][i] <= threshold: + list_dim.append(i) + checklist(list_dim,dim_name,"<=",threshold) + return list_dim + +def is_equal_sup(filename, dim_name, threshold): + list_dim=[] + for i in range(0,filename.variables[dim_name].size): + if filename.variables[dim_name][i] >= threshold: + list_dim.append(i) + checklist(list_dim,dim_name,">=",threshold) + return list_dim + +def is_strict_sup(filename, dim_name, threshold): + list_dim=[] + for i in range(0,filename.variables[dim_name].size): + if filename.variables[dim_name][i] > threshold: + list_dim.append(i) + checklist(list_dim,dim_name,">",threshold) + return list_dim + +def find_nearest(array,value): + index = (np.abs(array-value)).argmin() + return index + +def is_equal(filename, dim_name, value): + try: + index=filename.variables[dim_name][:].tolist().index(value) + except: + index=find_nearest(filename.variables[dim_name][:],value) + return index + +def is_between_include(filename, dim_name, threshold1, threshold2): + list_dim=[] + for i in range(0,filename.variables[dim_name].size): + if filename.variables[dim_name][i] >= threshold1 and filename.variables[dim_name][i] <= threshold2: + list_dim.append(i) + checklist(list_dim,dim_name,">=",threshold1) + checklist(list_dim,dim_name,"=<",threshold2) + return list_dim + +def is_between_exclude(filename, dim_name, threshold1, threshold2): + list_dim=[] + for i in range(0,filename.variables[dim_name].size): + if filename.variables[dim_name][i] > threshold1 and filename.variables[dim_name][i] < threshold2: + list_dim.append(i) + checklist(list_dim,dim_name,">",threshold1) + checklist(list_dim,dim_name,"<",threshold2) + return list_dim + +####################### +####################### + +#Get args +#Get Input file +inputfile=Dataset(sys.argv[1]) +var_file_tab=sys.argv[2] +var=sys.argv[3] #Var chosen by user + +Coord_bool=False + + +###################### +###################### +#len_threshold=1000000 +len_threshold=7000 +x_percent=0.75 +threshold_latlon=100 + + +#Check if coord is passed as parameter +arg_n=len(sys.argv)-1 +if(((arg_n-3)%3)!=0): + Coord_bool=True #Useful to get closest coord + arg_n=arg_n-4 #Number of arg minus lat & lon + name_dim_lat=str(sys.argv[-4]) + name_dim_lon=str(sys.argv[-2]) + value_dim_lat=float(sys.argv[-3]) + value_dim_lon=float(sys.argv[-1]) + + #Get all lat & lon + #try: + if True: + latitude=np.ma.MaskedArray(inputfile.variables[name_dim_lat]) + longitude=np.ma.MaskedArray(inputfile.variables[name_dim_lon]) + lat=latitude;lon=longitude #Usefull to keep the originals lat/lon vect before potentially resize it bellow. + len_all_coord=len(lat)*len(lon) + + #print("len all coord "+str(len_all_coord)+" threshold "+str(len_threshold)) + + #To avoid case when all_coord is to big and need to much memory + #If the vector is too big, reduce it to its third in a loop until its < to the threshold + while len_all_coord > len_threshold: + + if len(lat)> than lat. This way only lon is reduce and not lat. + x_percent_len_lat=99999999 + else: + x_percent_len_lat=int(x_percent*len(lat)) + + if len(lon)> than lon. This way only lat is reduce and not lon. + x_percent_len_lon=99999999 + else: + x_percent_len_lon=int(x_percent*len(lon)) + + #print("len(lat) :"+str(len(lat))+" x_percent_len_lat "+str(x_percent_len_lat)) + #print("len(lon) :"+str(len(lon))+" x_percent_len_lon "+str(x_percent_len_lon)) + + + pos_lat_user=find_nearest(lat,value_dim_lat) + pos_lon_user=find_nearest(lon,value_dim_lon) + + + #This part is to avoid having a vector that start bellow 0 + lat_reduced=int(pos_lat_user-x_percent_len_lat/2-1) + if lat_reduced<0: + lat_reduced=0 + lon_reduced=int(pos_lon_user-x_percent_len_lon/2-1) + if lon_reduced<0: + lon_reduced=0 + #Opposite here to avoid having vector with len > to len(vector) + lat_extended=int(pos_lat_user+x_percent_len_lat/2-1) + if lat_extended>len(lat): + lat_extended=len(lat) + lon_extended=int(pos_lon_user+x_percent_len_lon/2-1) + if lon_extended>len(lon): + lon_extended=len(lon) + + lat=lat[lat_reduced:lat_extended] #add a test to check if pos_lat_user-x_percent_len_lat/2-1 >0 + lon=lon[lon_reduced:lon_extended] + #print("latreduced : "+str(lat_reduced)+" latextended "+str(lat_extended)) + #print("lonreduced : "+str(lon_reduced)+" lonextended "+str(lon_extended)) + #print("lat : "+str(lat)) + #print("lon : "+str(lon)) + len_all_coord=len(lat)*len(lon) + + #print ("len_all_coord : "+str(len_all_coord)+". len_lat : "+str(len(lat))+" .len_lon : "+str(len(lon))) + + else: + #except: + sys.exit("Latitude & Longitude not found") + + #Set all lat-lon pair avaible in list_coord + list_coord_dispo=[] + for i in lat: + for j in lon: + list_coord_dispo.append(i);list_coord_dispo.append(j) + + #Reshape + all_coord=np.reshape(list_coord_dispo,(lat.size*lon.size,2)) + #np.set_printoptions(threshold='nan')#to print full vec + #print(str(all_coord)) + noval=True + + + +######################### +######################### + + +#Get the file of variables and number of dims : var.tab +var_file=open(var_file_tab,"r") #read +lines=var_file.readlines() #line +dim_names=[] +for line in lines: #for every lines + words=line.split() + if (words[0]==var): #When line match user input var + varndim=int(words[1]) #Get number of dim for the var + for dim in range(2,varndim*2+2,2): #Get dim names + dim_names.append(words[dim]) + #print ("Chosen var : "+sys.argv[3]+". Number of dimensions : "+str(varndim)+". Dimensions : "+str(dim_names)) #Standard msg + + +######################## +######################## + + +#Use a dictionary to save every lists of indexes +my_dic={} ##d["string{0}".format(x)] + +for i in range(4,arg_n,3): + #print("\nDimension name : "+sys.argv[i]+" action : "+sys.argv[i+1]+" .Value : "+sys.argv[i+2]+"\n") #Standard msg + + #Check if the dim selected for filtering is present in the var dimensions. + if (sys.argv[i] not in dim_names): + print("Warning ! "+sys.argv[i]+" is not a dimension of "+var+".\nThis filter will be skipped\nCheck in the file \"variables\" the dimensions available.\n\n") + pass + + my_dic["string{0}".format(i)]="list_index_dim" + my_dic_index="list_index_dim"+str(sys.argv[i]) #Possible improvement: Check if lon/lat are not parsed again + + #Apply every user filter. Call function and return list of index wich validate condition for every dim. + if (sys.argv[i+1]=="l"): #< + my_dic[my_dic_index]=is_strict_inf(inputfile, sys.argv[i], float(sys.argv[i+2])) + if (sys.argv[i+1]=="le"): #<= + my_dic[my_dic_index]=is_equal_inf(inputfile, sys.argv[i], float(sys.argv[i+2])) + if (sys.argv[i+1]=="g"): #> + my_dic[my_dic_index]=is_strict_sup(inputfile, sys.argv[i], float(sys.argv[i+2])) + if (sys.argv[i+1]=="ge"): #>= + my_dic[my_dic_index]=is_equal_sup(inputfile, sys.argv[i], float(sys.argv[i+2])) + if (sys.argv[i+1]=="e"): #== + my_dic[my_dic_index]=is_equal(inputfile, sys.argv[i], float(sys.argv[i+2])) + if (sys.argv[i+1]==":"): #all + my_dic[my_dic_index]=np.arange(inputfile.variables[sys.argv[i]].size) + if (sys.argv[i+1]=="be"): #between_exclude + #Get the 2 thresholds from the arg which looks like "threshold1-threshold2" + threshold1=sys.argv[i+2].split("-")[0] + threshold2=sys.argv[i+2].split("-")[1] + my_dic[my_dic_index]=is_between_exclude(inputfile, sys.argv[i], float(threshold1), float(threshold2)) + if (sys.argv[i+1]=="bi"): #between_include + #Get the 2 thresholds from the arg which looks like "threshold1-threshold2" + threshold1=sys.argv[i+2].split("-")[0] + threshold2=sys.argv[i+2].split("-")[1] + my_dic[my_dic_index]=is_between_include(inputfile, sys.argv[i], float(threshold1), float(threshold2)) + +##################### +##################### + + +#If precise coord given. +if Coord_bool: + while noval: #While no closest coord with valid values is found + #Return closest coord avaible + tree=spatial.KDTree(all_coord) + closest_coord=(tree.query([(value_dim_lat,value_dim_lon)])) + cc_index=closest_coord[1] + + closest_lat=float(all_coord[closest_coord[1]][0][0]) + closest_lon=float(all_coord[closest_coord[1]][0][1]) + + #Get coord index into dictionary + my_dic_index="list_index_dim"+str(name_dim_lat) + my_dic[my_dic_index]=latitude.tolist().index(closest_lat) + + my_dic_index="list_index_dim"+str(name_dim_lon) + my_dic[my_dic_index]=longitude.tolist().index(closest_lon) + + + #All dictionary are saved in the string exec2 which will be exec(). Value got are in vec2 + exec2="vec2=inputfile.variables['"+var+"'][" + first=True + for i in dim_names: #Every dim are in the right order + if not first: + exec2=exec2+"," + dimension_indexes="my_dic[\"list_index_dim"+i+"\"]" #new dim, custom name dic + try: #If some error or no specific user choices; every indexes are used for the selected dim. + exec(dimension_indexes) + except: + dimension_indexes=":" + exec2=exec2+dimension_indexes #Concatenate dim + first=False #Not the first element now + exec2=exec2+"]" + #print exec2 #To check integrity of the string + exec(exec2) #Execution, value are in vec2. + #print vec2 #Get the value, standard output + + #Check integrity of vec2. We don't want NA values + i=0 + #Check every value, if at least one non NA is found vec2 and the current closest coords are validated + vecsize=vec2.size + #print (str(vecsize)) + if vecsize>1: + while i1: + for s in range(0,size_dim): + b.append(inputfile[i][my_dic['list_index_dim'+i][s]]) + #print (i,inputfile[i][my_dic['list_index_dim'+i][s]]) + else: + b.append(inputfile[i][my_dic['list_index_dim'+i]]) + #print (i,inputfile[i][my_dic['list_index_dim'+i]]) + + a.append(b) + fo.write(i+"\t") +if Coord_bool: + fo.write("input_lat\t"+"input_lon\t") +fo.write(var+"\n") +fo.close() + + +###################### +###################### + + +#Write header in file +fo=open("header",'w') +for combination in itertools.product(*a): + if Coord_bool: + fo.write(str(combination)+"_"+str(value_dim_lat)+"_"+str(value_dim_lon)+"\t") + else: + fo.write(str(combination)+"\t") +fo.write("\n") +fo.close() + + +#Write vec2 in a tabular formated file +fo=open("sortie.tabular",'w') +#print(str(vec2)) +try: + vec2.tofile(fo,sep="\t",format="%s") +except: + vec3=np.ma.filled(vec2,np.nan) + vec3.tofile(fo,sep="\t",format="%s") +fo.close() + + +###################### +###################### + + +#Final sweet msg +print (var+" values successffuly extracted from "+sys.argv[1]+" !") diff -r 000000000000 -r 8da8ec7da45f netcdf_read.xml --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/netcdf_read.xml Thu Aug 02 09:24:38 2018 -0400 @@ -0,0 +1,256 @@ + + extracts variable values with custom conditions on dimensions + + matplotlib + netCDF4 + scipy + datamash + + 'header_cleaned' + && + cat 'header_cleaned' 'sortie.tabular' > 'supersortie.tabular' + && + datamash transpose < 'supersortie.tabular' > 'supersortie_transposed.tabular' + && + sed -i 's/_/\t/g' 'supersortie_transposed.tabular' + && + cat 'header_names' 'supersortie_transposed.tabular' | sed 's/\s/\t/g' > 'output_dir/coord'\$i'.tabular'; + done<'$coord_tabular' + + #else + + python '$__tool_directory__/netcdf_read.py' '$input' '$var_tab' $var + #for $i,$uc in enumerate($user_choice) + #if $uc.condi_between.comparator=="bi" + ${uc.dim} ${uc.condi_between.comparator} ${uc.condi_between.t1}-${uc.condi_between.t2} + #elif $uc.condi_between.comparator=="be" + ${uc.dim} ${uc.condi_between.comparator} ${uc.condi_between.t1}-${uc.condi_between.t2} + #else + ${uc.dim} ${uc.condi_between.comparator} ${uc.condi_between.value} + #end if + #end for + #if $condi_source_coord.condi_coord.coord=='yes_cust_coord' + $condi_source_coord.condi_coord.lat_dim $condi_source_coord.condi_coord.lat_val $condi_source_coord.condi_coord.lon_dim $condi_source_coord.condi_coord.lon_val + #end if + && + cat 'header' | sed 's/array(\[//g' | sed 's/], dtype=float32)//g'| sed 's/,\s/_/g' | sed 's/(//g' | sed 's/)//g' > 'header_cleaned' + && + cat 'header_cleaned' 'sortie.tabular' > 'supersortie.tabular' + && + datamash transpose < 'supersortie.tabular' > 'supersortie_transposed.tabular' + && + sed -i 's/_/\t/g' 'supersortie_transposed.tabular' + && + cat 'header_names' 'supersortie_transposed.tabular' | sed 's/\s/\t/g' > 'final.tabular' + + #end if + + + ]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + condi_source_coord['coord_source'] == 'coord_from_file' + + + condi_source_coord['coord_source'] == 'coord_from_stdin' + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + , <, >=, <=, [interval], ]interval[. + + + +**Input** + +A netcdf file (.nc). + +Variable tabular file from 'Netcdf Metadate Info'. + +Tabular file with coordinates and the following structure : 'lat' 'lon'. + + +**Outputs** + +A single output with values for the wanted variable if there is only one coordinate. + +A data collection where one file is created for every coordinate, if multiple coordinates from tabular file. + + +------------------------------------------------- + +The Netcdf Read tool can be used after the Netcdf Info. + ]]> +