Mercurial > repos > itaxotools > latlon_converter
view latlon_conv.py @ 0:ef146e4077e6 draft default tip
planemo upload commit 232ce39054ce38be27c436a4cabec2800e14f988-dirty
author | itaxotools |
---|---|
date | Sun, 29 Jan 2023 16:44:56 +0000 |
parents | |
children |
line wrap: on
line source
#!/usr/bin/env python3 import re import os import math import sys from typing import List, Tuple, Union, Iterator, Optional import tkinter as tk from tkinter import ttk import tkinter.filedialog as tkfiledialog import tkinter.messagebox import tkinter.font as tkfont import warnings # the parsers' input type Tokens = List[Tuple[int, str]] # types of minutes: either a float or int with seconds Minute = Union[float, Tuple[int, float]] # type of coordinates: either a float or sign, degree and minutes Coordinate = Union[float, Tuple[bool, int, Minute]] def dec_minute(minute: Minute) -> float: if isinstance(minute, float): return minute else: return minute[0] + minute[1] / 60 def dec_coord(coord: Coordinate) -> float: if isinstance(coord, float): return coord else: return (1 if coord[0] else -1) * (coord[1] + dec_minute(coord[2]) / 60) def sx_coord(coord: Coordinate) -> Tuple[bool, int, Tuple[int, float]]: if isinstance(coord, float): sign = coord >= 0 coord = abs(coord) deg = math.floor(coord) return sx_coord((sign, deg, (coord - deg) * 60)) else: sign, deg, mm = coord if isinstance(mm, float): mm_int = math.floor(mm) sec = (mm - mm_int) * 60 return (sign, deg, (mm_int, sec)) else: return (sign, deg, mm) def str_coord(coord: Coordinate, lat: bool) -> str: if lat: hems = ['S', 'N'] else: hems = ['W', 'E'] if isinstance(coord, float): return f"{abs(coord):.5f}{hems[coord >= 0]}" else: sign,degrees, minutes = coord if isinstance(minutes, float): return f"{degrees}°{minutes:.3f}'{hems[sign]}" else: return f"{degrees}°{minutes[0]}'{minutes[1]:.1f}''{hems[sign]}" def signed_coord(coord: str) -> str: hem = coord[-1] if hem in 'SW': return '-' + coord[:-1] else: return coord[:-1] def prepare_string(string: str) -> str: """ standardizes the string raises ValueError if both 'O' or 'o' and '°' are present in the string """ if ('O' in string or 'o' in string) and '°' in string: raise ValueError("Encountered 'O' to indicate geographical direction which can mean either West (Spanish/French/Italian) or East (German); please change to E or W before conversion.") string = string.casefold() string = re.sub('north', 'n', string) string = re.sub('south', 's', string) string = re.sub('west', 'w', string) string = re.sub('east', 'e', string) string = re.sub( 'seconds|sec|["“”‟]|[´`‘’‛][´`‘’‛]|[´`‘’‛] [´`‘’‛]', "''", string) string = re.sub('minutes|min|[´`‘’‛]', "'", string) string = re.sub('degrees|deg|o', '°', string) return string def parse_coord(tokens: Tokens) -> Tuple[Coordinate, Tokens]: """ parse a single coordinate and return the rest of the string raises a ValueError("parse error") if the beginning doesn't match a coordinate """ # deal with the simple situations if not tokens: raise ValueError("parse error") elif len(tokens) == 1: return float(tokens[0][0]), tokens[1:] first_sep = tokens[0][1] if first_sep[0] in '.,': # parse floating point coordinate return parse_float(tokens) elif first_sep[0] == '°': # parse degree, minutes coordinate degrees = tokens[0][0] try: minutes, tokens1 = parse_minutes(tokens[1:]) return (degrees >= 0, abs(degrees), minutes), tokens1 except ValueError: # there is no minutes return (float(degrees), tokens[1:]) elif first_sep[0] == 'xB0': # parse degree, minutes coordinate degrees = tokens[0][0] try: minutes, tokens1 = parse_minutes(tokens[1:]) return (degrees >= 0, abs(degrees), minutes), tokens1 except ValueError: # there is no minutes return (float(degrees), tokens[1:]) else: raise ValueError("parse error") def parse_float(tokens: Tokens) -> Tuple[float, Tokens]: """ parse a float and return the rest of the string raise a ValueError("parse error") if the length is less than two """ if len(tokens) < 2: raise ValueError("parse error") else: int_part = tokens[0][0] dec_part = tokens[1][0] return (float(str(int_part) + '.' + str(dec_part)), tokens[2:]) def parse_minutes(tokens: Tokens) -> Tuple[Minute, Tokens]: """ parse a coordinate starting with minutes and return the rest of the string raises a ValueError("parse error"), if the parsing fails """ # deal with the simple situations if not tokens: raise ValueError("parse error") elif len(tokens) == 1: return float(tokens[0][0]), tokens[1:] first_sep = tokens[0][1] if first_sep[0] in '.,': # parse floating point minutes return parse_float(tokens) elif first_sep[0] == "'": # parse minutes, seconds minutes = tokens[0][0] try: seconds, tokens1 = parse_seconds(tokens[1:]) return ((minutes, seconds), tokens1) except ValueError: # there is no seconds return (float(minutes), tokens[1:]) else: raise ValueError("parse error") def parse_seconds(tokens: Tokens) -> Tuple[float, Tokens]: """ parse a coordinate starting with seconds and return the rest of the string raises a ValueError("parse error"), if the parsing fails """ # deal with the simple situations if not tokens: raise ValueError("parse error") elif len(tokens) == 1: return float(tokens[0][0]), tokens[1:] first_sep = tokens[0][1] if first_sep[0] in '.,': # parse floating point seconds return parse_float(tokens) elif first_sep[0:2] == "''": # parse seconds return tokens[0][0], tokens[1:] else: raise ValueError("parse error") def hemisphere_sign(c: str, coord: Coordinate) -> Coordinate: if c in 'ne': return coord else: if isinstance(coord, float): return -coord else: return (not coord[0], coord[1], coord[2]) def cannot_parse_error(tokens: Tokens) -> ValueError: """ makes a ValueError "Cannot parse: tokens as str" """ return ValueError("Cannot parse: " + ''.join(str(n)+sep for n, sep in tokens)) def parse_coordinates(string: str, lat_first: bool) -> Tuple[Coordinate, Coordinate]: """ parses a string into coordinates with latitude first lat_first indicates whether latitude is first in unmarked strings if the whole string is not consumed, raises a ValueError with the rest """ # sanitize the string string = prepare_string(string) # extract the quadrant information letters = [c for c in string if c.isalpha()] quadrant = [c for c in letters if c in 'nsew'] if len(letters) > len(quadrant): # there are disallowed letters in coordinates raise ValueError("Letters {set(letters) - set(quadrant)} cannot be regognized as hemispheres") # defines method orient that exchanges and negates the coordinates based on the quadrant if not quadrant: if lat_first: def orient(p: Tuple[Coordinate, Coordinate] ) -> Tuple[Coordinate, Coordinate]: return p else: def orient(p: Tuple[Coordinate, Coordinate] ) -> Tuple[Coordinate, Coordinate]: return (p[1], p[0]) elif len(quadrant) == 2: if quadrant[0] in 'ns' and quadrant[1] in 'we': def swap(p: Tuple[Coordinate, Coordinate]) -> Tuple[Coordinate, Coordinate]: return p elif quadrant[0] in 'we' and quadrant[1] in 'ns': def swap(p: Tuple[Coordinate, Coordinate]) -> Tuple[Coordinate, Coordinate]: return (p[1], p[0]) def orient(p: Tuple[Coordinate, Coordinate]) -> Tuple[Coordinate, Coordinate]: return swap((hemisphere_sign(quadrant[0], p[0]), hemisphere_sign(quadrant[1], p[1]))) else: raise ValueError(f"Cannot recognize the order of coordinates: {string}") # split the string into tokens tokens = [(int(m.group(1)), m.group(2)) for m in re.finditer(r'(-?\d+)([^\d-]*)', string)] # parse coordinates one after the other try: coord0, tokens1 = parse_coord(tokens) except ValueError as ex: raise cannot_parse_error(tokens) from ex if not tokens1 and len(tokens) == 2: # probably the degrees, degrees situation return orient((float(tokens[0][0]), float(tokens[1][0]))) try: coord1, rest = parse_coord(tokens1) except ValueError as ex: raise cannot_parse_error(tokens1) from ex if rest: # incomplete parse: error raise cannot_parse_error(rest) else: return orient((coord0, coord1)) def validate_coord(coord: Coordinate, direction: str) -> str: """ direction is "latitude" or "longitude" Returns a message if minutes or seconds are invalid (not in [0, 60)) Otherwise returns an empty string """ if isinstance(coord, float): if not 0 <= coord <= 180: return f"Invalid degrees in {direction}" else: return "" else: _, degrees, minutes = coord if not 0 <= degrees <= 180: return f"Invalid degrees in {direction}" if isinstance(minutes, float): if not 0 <= minutes < 60: return f"Invalid minutes in {direction}" else: return "" else: whole_minutes, seconds = minutes if not 0 <= whole_minutes < 60: return f"Invalid minutes in {direction}" elif not 0 <= seconds < 60: return f"Invalid seconds in {direction}" else: return "" def process_simpl(input: Iterator[str]) -> Iterator[List[str]]: # by default latitude comes first) lat_first = True # read the first line try: line = next(input) heading = line.casefold() except StopIteration: return # try to find 'lat' and 'lon' in the first line lat_ind = heading.find('lat') lon_ind = heading.find('lon') if lat_ind >= 0 and lon_ind >= 0: # first line in the heading lat_first = lat_ind <= lon_ind try: line = next(input) except StopIteration: return # yield the output heading both = "latlon" if lat_first else "lotlan" yield ["original_lat", "original_lon", f"original_{both}", "lat_corr", "lon_corr", "lat_dec", "lon_dec", "latlon_dec", "lat_sx", "lon_sx", "latlon_sx", "Remark"] while True: # format the part of the output with the original information line = line.strip() if not line: try: line = next(input) except StopIteration: break continue part1, _, part2 = line.partition('\t') if not part1 or not part2 or part1.isspace() or part2.isspace(): original = ["", "", line] elif lat_first: original = [part1, part2, ""] else: original = [part2, part1, ""] # try to parse the line, if it fails, output just the original try: lat, lon = parse_coordinates(line, lat_first) except ValueError as ex: yield original + [""] * 8 + [str(ex)] try: line = next(input) except StopIteration: break continue # validate bounds on minutes and seconds remark = validate_coord(lat, "latitude") + validate_coord(lon, "longitude") if remark: yield original + [""] * 8 + [remark] try: line = next(input) except StopIteration: break continue # compose the output lat_corr = str_coord(lat, True) lon_corr = str_coord(lon, False) lat_dec = str_coord(dec_coord(lat), True) lon_dec = str_coord(dec_coord(lon), False) lat_sx = str_coord(sx_coord(lat), True) lon_sx = str_coord(sx_coord(lon), False) yield original + [lat_corr, lon_corr, signed_coord(lat_dec), signed_coord(lon_dec), f"{lat_dec} {lon_dec}", lat_sx, lon_sx, f"{lat_sx} {lon_sx}", ""] try: line = next(input) except StopIteration: break def launch_gui() -> None: # initialization root = tk.Tk() root.title("LatLonConverter") if os.name == "nt": root.wm_iconbitmap(os.path.join('data', 'LatLonIcon.ico')) mainframe = ttk.Frame(root, padding=5) root.rowconfigure(1, weight=1) root.columnconfigure(0, weight=1) mainframe.rowconfigure(4, weight=1) mainframe.columnconfigure(2, weight=1) style = ttk.Style() style.configure("ConvertButton.TButton", background="blue") # banner frame banner_frame = ttk.Frame(root) banner_img = tk.PhotoImage(file=os.path.join( "data", "iTaxoTools Digital linneaeus MICROLOGO.png")) banner_image = ttk.Label(banner_frame, image=banner_img) banner_image.grid(row=0, column=0, rowspan=2, sticky='nsw') program_name = ttk.Label( banner_frame, text="LatLonConverter", font=tkfont.Font(size=20)) program_name.grid(row=1, column=1, sticky='sw') program_description = ttk.Label( banner_frame, text="A batch converter of geographical coordinates") program_description.grid(row=1, column=2, sticky='sw', ipady=4, ipadx=15) banner_frame.grid(column=0, row=0, sticky='nsw') # create labels infile_lbl = ttk.Label(mainframe, text="Input file") outfile_lbl = ttk.Label(mainframe, text="Output file") # create entries infile_var = tk.StringVar() infile_entr = ttk.Entry(mainframe, textvariable=infile_var) outfile_var = tk.StringVar() outfile_entr = ttk.Entry(mainframe, textvariable=outfile_var) # create texts input_frame = ttk.Frame(mainframe) input_frame.rowconfigure(1, weight=1) input_frame.columnconfigure(0, weight=1) input_text = tk.Text(input_frame, width=50, height=15, undo=True) input_lbl = ttk.Label(input_frame, text="Paste coordinates here for fast conversion into decimal format\n(one pair of coordinates per line, in any format)") input_xscroll = ttk.Scrollbar( input_frame, orient=tk.HORIZONTAL, command=input_text.xview) input_yscroll = ttk.Scrollbar( input_frame, orient=tk.VERTICAL, command=input_text.yview) input_text.configure(xscrollcommand=input_xscroll.set, yscrollcommand=input_yscroll.set) input_lbl.grid(row=0, column=0, sticky='w') input_text.grid(row=1, column=0, sticky='nsew') input_xscroll.grid(row=2, column=0, sticky='nsew') input_yscroll.grid(row=1, column=1, sticky='nsew') output_frame = ttk.Frame(mainframe) output_frame.rowconfigure(1, weight=1) output_frame.columnconfigure(0, weight=1) output_text = tk.Text(output_frame, width=50, height=15, wrap='none') output_lbl = ttk.Label(output_frame, text="If the data have been pasted into the window on the left,\nthe converted output will be show here.") output_xscroll = ttk.Scrollbar( output_frame, orient=tk.HORIZONTAL, command=output_text.xview) output_yscroll = ttk.Scrollbar( output_frame, orient=tk.VERTICAL, command=output_text.yview) output_text.configure(xscrollcommand=output_xscroll.set, yscrollcommand=output_yscroll.set) output_text.configure(state='disabled') # make sure the widget gets focus when clicked # on, to enable highlighting and copying to the # clipboard. output_text.bind("<1>", lambda _: output_text.focus_set()) output_lbl.grid(row=0, column=0, sticky='w') output_text.grid(row=1, column=0, sticky='nsew') output_xscroll.grid(row=2, column=0, sticky='nsew') output_yscroll.grid(row=1, column=1, sticky='nsew') # internal functions def input_lines() -> Iterator[str]: """ returns an iterator over the input lines if the input file name is given, the line comes from it, otherwise from the input text widget """ filename = infile_var.get() if filename and not filename.isspace(): with open(filename, encoding="ansi", errors='replace') as file: for line in file: yield line else: text = input_text.get('1.0', 'end') for line in text.splitlines(): yield line def write_output(lines: Iterator[List[str]]) -> None: """ writes the output if the output file name is given, the output is written to it, otherwise to the output text widget """ output_text.configure(state='normal') filename = outfile_var.get() output_text.delete('1.0', 'end') if filename and not filename.isspace(): with open(filename, encoding="ansi", mode='w') as file: for line in lines: print("\t".join(line), file=file) else: for line in lines: output_text.insert('end', f"{line[5]}\t{line[6]}\t{line[-1]}") output_text.insert('end', '\n') output_text.configure(state='disabled') def browse_infile() -> None: newpath: Optional[str] = tkfiledialog.askopenfilename() if (newpath): try: newpath = os.path.relpath(newpath) except: newpath = os.path.abspath(newpath) infile_var.set(newpath) def browse_outfile() -> None: newpath: Optional[str] = tkfiledialog.asksaveasfilename() if (newpath): try: newpath = os.path.relpath(newpath) except: newpath = os.path.abspath(newpath) outfile_var.set(newpath) def process() -> None: """ command for the Process button """ try: # catch all warnings with warnings.catch_warnings(record=True) as warns: write_output(process_simpl(input_lines())) # display the warnings generated during the conversion for w in warns: tkinter.messagebox.showwarning("Warning", str(w.message)) # notify the user that the converions is finished tkinter.messagebox.showinfo( "Done.", "The processing has been completed") # show the ValueErrors and FileNotFoundErrors except ValueError as ex: tkinter.messagebox.showerror("Error", str(ex)) except FileNotFoundError as ex: tkinter.messagebox.showerror("Error", str(ex)) def load() -> None: """ loads the text from the input file into the input text widget """ filename = infile_var.get() input_text.delete('1.0', 'end') if filename and not filename.isspace(): with open(filename, encoding="utf8", errors='replace') as file: for line in file: input_text.insert('end', line) # create buttons infile_btn = ttk.Button(mainframe, text="Browse", command=browse_infile) outfile_btn = ttk.Button(mainframe, text="Browse", command=browse_outfile) load_btn = ttk.Button(mainframe, text="Load", command=load) process_btn = ttk.Button(mainframe, text="Convert", command=process, style="ConvertButton.TButton") # display the widgets infile_lbl.grid(row=0, column=0, sticky='w') infile_entr.grid(row=1, column=0, sticky='we') infile_btn.grid(row=1, column=1, sticky='w') outfile_lbl.grid(row=0, column=3, sticky='w') outfile_entr.grid(row=1, column=3, sticky='we') outfile_btn.grid(row=1, column=4, sticky='w') load_btn.grid(row=2, column=0) process_btn.grid(row=2, column=2) ttk.Separator(mainframe, orient='horizontal').grid(row=3, column=0, columnspan=5, sticky='nsew', pady=20) input_frame.grid(row=4, column=0, columnspan=2) output_frame.grid(row=4, column=3, columnspan=2) ttk.Separator(root, orient='horizontal').grid(row=1, column=0, sticky='nsew') mainframe.grid(row=2, column=0, sticky='nsew') root.mainloop() if '--cmd' in sys.argv: for line in process_simpl(sys.stdin): print('\t'.join(line)) else: launch_gui()