diff latlon_conv.py @ 0:ef146e4077e6 draft default tip

planemo upload commit 232ce39054ce38be27c436a4cabec2800e14f988-dirty
author itaxotools
date Sun, 29 Jan 2023 16:44:56 +0000
parents
children
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/latlon_conv.py	Sun Jan 29 16:44:56 2023 +0000
@@ -0,0 +1,575 @@
+#!/usr/bin/env python3
+import re
+import os
+import math
+import sys
+from typing import List, Tuple, Union, Iterator, Optional
+import tkinter as tk
+from tkinter import ttk
+import tkinter.filedialog as tkfiledialog
+import tkinter.messagebox
+import tkinter.font as tkfont
+import warnings
+
+# the parsers' input type
+Tokens = List[Tuple[int, str]]
+# types of minutes: either a float or int with seconds
+Minute = Union[float, Tuple[int, float]]
+# type of coordinates: either a float or sign, degree and minutes
+Coordinate = Union[float, Tuple[bool, int, Minute]]
+
+
+def dec_minute(minute: Minute) -> float:
+    if isinstance(minute, float):
+        return minute
+    else:
+        return minute[0] + minute[1] / 60
+
+
+def dec_coord(coord: Coordinate) -> float:
+    if isinstance(coord, float):
+        return coord
+    else:
+        return (1 if coord[0] else -1) * (coord[1] + dec_minute(coord[2]) / 60)
+
+
+def sx_coord(coord: Coordinate) -> Tuple[bool, int, Tuple[int, float]]:
+    if isinstance(coord, float):
+        sign = coord >= 0
+        coord = abs(coord)
+        deg = math.floor(coord)
+        return sx_coord((sign, deg, (coord - deg) * 60))
+    else:
+        sign, deg, mm = coord
+        if isinstance(mm, float):
+            mm_int = math.floor(mm)
+            sec = (mm - mm_int) * 60
+            return (sign, deg, (mm_int, sec))
+        else:
+            return (sign, deg, mm)
+
+
+def str_coord(coord: Coordinate, lat: bool) -> str:
+    if lat:
+        hems = ['S', 'N']
+    else:
+        hems = ['W', 'E']
+    if isinstance(coord, float):
+        return f"{abs(coord):.5f}{hems[coord >= 0]}"
+    else:
+        sign,degrees, minutes = coord
+        if isinstance(minutes, float):
+            return f"{degrees}°{minutes:.3f}'{hems[sign]}"
+        else:
+            return f"{degrees}°{minutes[0]}'{minutes[1]:.1f}''{hems[sign]}"
+
+
+def signed_coord(coord: str) -> str:
+    hem = coord[-1]
+    if hem in 'SW':
+        return '-' + coord[:-1]
+    else:
+        return coord[:-1]
+
+
+def prepare_string(string: str) -> str:
+    """
+    standardizes the string
+
+    raises ValueError if both 'O' or 'o' and '°' are present in the string
+    """
+    if ('O' in string or 'o' in string) and '°' in string:
+        raise ValueError("Encountered 'O' to indicate geographical direction which can mean either West (Spanish/French/Italian) or East (German); please change to E or W before conversion.")
+    string = string.casefold()
+    string = re.sub('north', 'n', string)
+    string = re.sub('south', 's', string)
+    string = re.sub('west', 'w', string)
+    string = re.sub('east', 'e', string)
+    string = re.sub(
+        'seconds|sec|["“”‟]|[´`‘’‛][´`‘’‛]|[´`‘’‛] [´`‘’‛]', "''", string)
+    string = re.sub('minutes|min|[´`‘’‛]', "'", string)
+    string = re.sub('degrees|deg|o', '°', string)
+    return string
+
+
+def parse_coord(tokens: Tokens) -> Tuple[Coordinate, Tokens]:
+    """
+    parse a single coordinate and return the rest of the string
+
+    raises a ValueError("parse error") if the beginning doesn't match a coordinate
+    """
+    # deal with the simple situations
+    if not tokens:
+        raise ValueError("parse error")
+    elif len(tokens) == 1:
+        return float(tokens[0][0]), tokens[1:]
+
+    first_sep = tokens[0][1]
+    if first_sep[0] in '.,':
+        # parse floating point coordinate
+        return parse_float(tokens)
+    elif first_sep[0] == '°':
+        # parse degree, minutes coordinate
+        degrees = tokens[0][0]
+        try:
+            minutes, tokens1 = parse_minutes(tokens[1:])
+            return (degrees >= 0, abs(degrees), minutes), tokens1
+        except ValueError:
+            # there is no minutes
+            return (float(degrees), tokens[1:])
+    elif first_sep[0] == 'xB0':
+        # parse degree, minutes coordinate
+        degrees = tokens[0][0]
+        try:
+            minutes, tokens1 = parse_minutes(tokens[1:])
+            return (degrees >= 0, abs(degrees), minutes), tokens1
+        except ValueError:
+            # there is no minutes
+            return (float(degrees), tokens[1:])
+
+    else:
+        raise ValueError("parse error")
+
+
+def parse_float(tokens: Tokens) -> Tuple[float, Tokens]:
+    """
+    parse a float and return the rest of the string
+
+    raise a ValueError("parse error") if the length is less than two
+    """
+    if len(tokens) < 2:
+        raise ValueError("parse error")
+    else:
+        int_part = tokens[0][0]
+        dec_part = tokens[1][0]
+        return (float(str(int_part) + '.' + str(dec_part)), tokens[2:])
+
+
+def parse_minutes(tokens: Tokens) -> Tuple[Minute, Tokens]:
+    """
+    parse a coordinate starting with minutes and return the rest of the string
+
+    raises a ValueError("parse error"), if the parsing fails
+    """
+    # deal with the simple situations
+    if not tokens:
+        raise ValueError("parse error")
+    elif len(tokens) == 1:
+        return float(tokens[0][0]), tokens[1:]
+
+    first_sep = tokens[0][1]
+    if first_sep[0] in '.,':
+        # parse floating point minutes
+        return parse_float(tokens)
+    elif first_sep[0] == "'":
+        # parse minutes, seconds
+        minutes = tokens[0][0]
+        try:
+            seconds, tokens1 = parse_seconds(tokens[1:])
+            return ((minutes, seconds), tokens1)
+        except ValueError:
+            # there is no seconds
+            return (float(minutes), tokens[1:])
+    else:
+        raise ValueError("parse error")
+
+
+def parse_seconds(tokens: Tokens) -> Tuple[float, Tokens]:
+    """
+    parse a coordinate starting with seconds and return the rest of the string
+
+    raises a ValueError("parse error"), if the parsing fails
+    """
+    # deal with the simple situations
+    if not tokens:
+        raise ValueError("parse error")
+    elif len(tokens) == 1:
+        return float(tokens[0][0]), tokens[1:]
+
+    first_sep = tokens[0][1]
+    if first_sep[0] in '.,':
+        # parse floating point seconds
+        return parse_float(tokens)
+    elif first_sep[0:2] == "''":
+        # parse seconds
+        return tokens[0][0], tokens[1:]
+    else:
+        raise ValueError("parse error")
+
+
+def hemisphere_sign(c: str, coord: Coordinate) -> Coordinate:
+    if c in 'ne':
+        return coord
+    else:
+        if isinstance(coord, float):
+            return -coord
+        else:
+            return (not coord[0], coord[1], coord[2])
+
+def cannot_parse_error(tokens: Tokens) -> ValueError:
+    """
+    makes a ValueError
+    "Cannot parse: tokens as str"
+    """
+    return ValueError("Cannot parse: " + ''.join(str(n)+sep for n, sep in tokens))
+
+def parse_coordinates(string: str, lat_first: bool) -> Tuple[Coordinate, Coordinate]:
+    """
+    parses a string into coordinates with latitude first
+
+    lat_first indicates whether latitude is first in unmarked strings
+
+    if the whole string is not consumed, raises a ValueError with the rest
+    """
+    # sanitize the string
+    string = prepare_string(string)
+    # extract the quadrant information
+    letters = [c for c in string if c.isalpha()]
+    quadrant = [c for c in letters if c in 'nsew']
+    if len(letters) > len(quadrant):
+        # there are disallowed letters in coordinates
+        raise ValueError("Letters {set(letters) - set(quadrant)} cannot be regognized as hemispheres")
+    # defines method orient that exchanges and negates the coordinates based on the quadrant
+    if not quadrant:
+        if lat_first:
+            def orient(p: Tuple[Coordinate, Coordinate]
+                       ) -> Tuple[Coordinate, Coordinate]:
+                return p
+        else:
+            def orient(p: Tuple[Coordinate, Coordinate]
+                       ) -> Tuple[Coordinate, Coordinate]:
+                return (p[1], p[0])
+    elif len(quadrant) == 2:
+        if quadrant[0] in 'ns' and quadrant[1] in 'we':
+            def swap(p: Tuple[Coordinate, Coordinate]) -> Tuple[Coordinate, Coordinate]:
+                return p
+        elif quadrant[0] in 'we' and quadrant[1] in 'ns':
+            def swap(p: Tuple[Coordinate, Coordinate]) -> Tuple[Coordinate, Coordinate]:
+                return (p[1], p[0])
+
+        def orient(p: Tuple[Coordinate, Coordinate]) -> Tuple[Coordinate, Coordinate]:
+            return swap((hemisphere_sign(quadrant[0], p[0]), hemisphere_sign(quadrant[1], p[1])))
+    else:
+        raise ValueError(f"Cannot recognize the order of coordinates: {string}")
+    # split the string into tokens
+    tokens = [(int(m.group(1)), m.group(2))
+              for m in re.finditer(r'(-?\d+)([^\d-]*)', string)]
+    # parse coordinates one after the other
+    try:
+        coord0, tokens1 = parse_coord(tokens)
+    except ValueError as ex:
+        raise cannot_parse_error(tokens) from ex
+    if not tokens1 and len(tokens) == 2: # probably the degrees, degrees situation
+        return orient((float(tokens[0][0]), float(tokens[1][0])))
+    try:
+        coord1, rest = parse_coord(tokens1)
+    except ValueError as ex:
+        raise cannot_parse_error(tokens1) from ex
+    if rest:
+        # incomplete parse: error
+        raise cannot_parse_error(rest)
+    else:
+        return orient((coord0, coord1))
+
+def validate_coord(coord: Coordinate, direction: str) -> str:
+    """
+    direction is "latitude" or "longitude"
+
+    Returns a message if minutes or seconds are invalid (not in [0, 60))
+    Otherwise returns an empty string
+    """
+    if isinstance(coord, float):
+        if not 0 <= coord <= 180:
+            return f"Invalid degrees in {direction}"
+        else:
+            return ""
+    else:
+        _, degrees, minutes = coord
+        if not 0 <= degrees <= 180:
+            return f"Invalid degrees in {direction}"
+        if isinstance(minutes, float):
+            if not 0 <= minutes < 60:
+                return f"Invalid minutes in {direction}"
+            else:
+                return ""
+        else:
+            whole_minutes, seconds = minutes
+            if not 0 <= whole_minutes < 60:
+                return f"Invalid minutes in {direction}"
+            elif not 0 <= seconds < 60:
+                return f"Invalid seconds in {direction}"
+            else:
+                return ""
+
+
+def process_simpl(input: Iterator[str]) -> Iterator[List[str]]:
+    # by default latitude comes first)
+    lat_first = True
+    # read the first line
+    try:
+        line = next(input)
+        heading = line.casefold()
+    except StopIteration:
+        return
+    # try to find 'lat' and 'lon' in the first line
+    lat_ind = heading.find('lat')
+    lon_ind = heading.find('lon')
+    if lat_ind >= 0 and lon_ind >= 0:
+        # first line in the heading
+        lat_first = lat_ind <= lon_ind
+        try:
+            line = next(input)
+        except StopIteration:
+            return
+    # yield the output heading
+    both = "latlon" if lat_first else "lotlan"
+    yield ["original_lat", "original_lon", f"original_{both}", "lat_corr", "lon_corr", "lat_dec", "lon_dec", "latlon_dec", "lat_sx", "lon_sx", "latlon_sx", "Remark"]
+    while True:
+        # format the part of the output with the original information
+        line = line.strip()
+        if not line:
+            try:
+                line = next(input)
+            except StopIteration:
+                break
+            continue
+        part1, _, part2 = line.partition('\t')
+        if not part1 or not part2 or part1.isspace() or part2.isspace():
+            original = ["", "", line]
+        elif lat_first:
+            original = [part1, part2, ""]
+        else:
+            original = [part2, part1, ""]
+        # try to parse the line, if it fails, output just the original
+        try:
+            lat, lon = parse_coordinates(line, lat_first)
+        except ValueError as ex:
+            yield original + [""] * 8 + [str(ex)]
+            try:
+                line = next(input)
+            except StopIteration:
+                break
+            continue
+        # validate bounds on minutes and seconds
+        remark = validate_coord(lat, "latitude") + validate_coord(lon, "longitude")
+        if remark:
+            yield original + [""] * 8 + [remark]
+            try:
+                line = next(input)
+            except StopIteration:
+                break
+            continue
+        # compose the output
+        lat_corr = str_coord(lat, True)
+        lon_corr = str_coord(lon, False)
+        lat_dec = str_coord(dec_coord(lat), True)
+        lon_dec = str_coord(dec_coord(lon), False)
+        lat_sx = str_coord(sx_coord(lat), True)
+        lon_sx = str_coord(sx_coord(lon), False)
+        yield original + [lat_corr, lon_corr, signed_coord(lat_dec), signed_coord(lon_dec), f"{lat_dec} {lon_dec}", lat_sx, lon_sx, f"{lat_sx} {lon_sx}", ""]
+        try:
+            line = next(input)
+        except StopIteration:
+            break
+
+
+def launch_gui() -> None:
+    # initialization
+    root = tk.Tk()
+    root.title("LatLonConverter")
+    if os.name == "nt":
+        root.wm_iconbitmap(os.path.join('data', 'LatLonIcon.ico'))
+    mainframe = ttk.Frame(root, padding=5)
+    root.rowconfigure(1, weight=1)
+    root.columnconfigure(0, weight=1)
+    mainframe.rowconfigure(4, weight=1)
+    mainframe.columnconfigure(2, weight=1)
+
+    style = ttk.Style()
+    style.configure("ConvertButton.TButton", background="blue")
+
+    # banner frame
+    banner_frame = ttk.Frame(root)
+    banner_img = tk.PhotoImage(file=os.path.join(
+        "data", "iTaxoTools Digital linneaeus MICROLOGO.png"))
+    banner_image = ttk.Label(banner_frame, image=banner_img)
+    banner_image.grid(row=0, column=0, rowspan=2, sticky='nsw')
+    program_name = ttk.Label(
+        banner_frame, text="LatLonConverter", font=tkfont.Font(size=20))
+    program_name.grid(row=1, column=1, sticky='sw')
+    program_description = ttk.Label(
+        banner_frame, text="A batch converter of geographical coordinates")
+    program_description.grid(row=1, column=2, sticky='sw', ipady=4, ipadx=15)
+    banner_frame.grid(column=0, row=0, sticky='nsw')
+
+
+    # create labels
+    infile_lbl = ttk.Label(mainframe, text="Input file")
+    outfile_lbl = ttk.Label(mainframe, text="Output file")
+
+    # create entries
+    infile_var = tk.StringVar()
+    infile_entr = ttk.Entry(mainframe, textvariable=infile_var)
+    outfile_var = tk.StringVar()
+    outfile_entr = ttk.Entry(mainframe, textvariable=outfile_var)
+
+    # create texts
+    input_frame = ttk.Frame(mainframe)
+    input_frame.rowconfigure(1, weight=1)
+    input_frame.columnconfigure(0, weight=1)
+    input_text = tk.Text(input_frame, width=50, height=15, undo=True)
+    input_lbl = ttk.Label(input_frame, text="Paste coordinates here for fast conversion into decimal format\n(one pair of coordinates per line, in any format)")
+    input_xscroll = ttk.Scrollbar(
+        input_frame, orient=tk.HORIZONTAL, command=input_text.xview)
+    input_yscroll = ttk.Scrollbar(
+        input_frame, orient=tk.VERTICAL, command=input_text.yview)
+    input_text.configure(xscrollcommand=input_xscroll.set,
+                         yscrollcommand=input_yscroll.set)
+    input_lbl.grid(row=0, column=0, sticky='w')
+    input_text.grid(row=1, column=0, sticky='nsew')
+    input_xscroll.grid(row=2, column=0, sticky='nsew')
+    input_yscroll.grid(row=1, column=1, sticky='nsew')
+
+    output_frame = ttk.Frame(mainframe)
+    output_frame.rowconfigure(1, weight=1)
+    output_frame.columnconfigure(0, weight=1)
+    output_text = tk.Text(output_frame, width=50, height=15, wrap='none')
+    output_lbl = ttk.Label(output_frame, text="If the data have been pasted into the window on the left,\nthe converted output will be show here.")
+    output_xscroll = ttk.Scrollbar(
+        output_frame, orient=tk.HORIZONTAL, command=output_text.xview)
+    output_yscroll = ttk.Scrollbar(
+        output_frame, orient=tk.VERTICAL, command=output_text.yview)
+    output_text.configure(xscrollcommand=output_xscroll.set,
+                          yscrollcommand=output_yscroll.set)
+    output_text.configure(state='disabled')
+    # make sure the widget gets focus when clicked
+    # on, to enable highlighting and copying to the
+    # clipboard.
+    output_text.bind("<1>", lambda _: output_text.focus_set())
+    output_lbl.grid(row=0, column=0, sticky='w')
+    output_text.grid(row=1, column=0, sticky='nsew')
+    output_xscroll.grid(row=2, column=0, sticky='nsew')
+    output_yscroll.grid(row=1, column=1, sticky='nsew')
+
+    # internal functions
+    def input_lines() -> Iterator[str]:
+        """
+        returns an iterator over the input lines
+
+        if the input file name is given, the line comes from it,
+        otherwise from the input text widget
+        """
+        filename = infile_var.get()
+        if filename and not filename.isspace():
+            with open(filename, encoding="ansi", errors='replace') as file:
+                for line in file:
+                    yield line
+        else:
+            text = input_text.get('1.0', 'end')
+            for line in text.splitlines():
+                yield line
+
+    def write_output(lines: Iterator[List[str]]) -> None:
+        """
+        writes the output
+
+        if the output file name is given, the output is written to it,
+        otherwise to the output text widget
+        """
+        output_text.configure(state='normal')
+        filename = outfile_var.get()
+        output_text.delete('1.0', 'end')
+        if filename and not filename.isspace():
+            with open(filename, encoding="ansi", mode='w') as file:
+                for line in lines:
+                    print("\t".join(line), file=file)
+        else:
+            for line in lines:
+                output_text.insert('end', f"{line[5]}\t{line[6]}\t{line[-1]}")
+                output_text.insert('end', '\n')
+        output_text.configure(state='disabled')
+
+    def browse_infile() -> None:
+        newpath: Optional[str] = tkfiledialog.askopenfilename()
+        if (newpath):
+            try:
+                newpath = os.path.relpath(newpath)
+            except:
+                newpath = os.path.abspath(newpath)
+            infile_var.set(newpath)
+
+    def browse_outfile() -> None:
+        newpath: Optional[str] = tkfiledialog.asksaveasfilename()
+        if (newpath):
+            try:
+                newpath = os.path.relpath(newpath)
+            except:
+                newpath = os.path.abspath(newpath)
+            outfile_var.set(newpath)
+
+    def process() -> None:
+        """
+        command for the Process button
+        """
+        try:
+            # catch all warnings
+            with warnings.catch_warnings(record=True) as warns:
+                write_output(process_simpl(input_lines()))
+                # display the warnings generated during the conversion
+                for w in warns:
+                    tkinter.messagebox.showwarning("Warning", str(w.message))
+            # notify the user that the converions is finished
+            tkinter.messagebox.showinfo(
+                "Done.", "The processing has been completed")
+        # show the ValueErrors and FileNotFoundErrors
+        except ValueError as ex:
+            tkinter.messagebox.showerror("Error", str(ex))
+        except FileNotFoundError as ex:
+            tkinter.messagebox.showerror("Error", str(ex))
+
+    def load() -> None:
+        """
+        loads the text from the input file into the input text widget
+        """
+        filename = infile_var.get()
+        input_text.delete('1.0', 'end')
+        if filename and not filename.isspace():
+            with open(filename, encoding="utf8", errors='replace') as file:
+                for line in file:
+                    input_text.insert('end', line)
+
+    # create buttons
+    infile_btn = ttk.Button(mainframe, text="Browse", command=browse_infile)
+    outfile_btn = ttk.Button(mainframe, text="Browse", command=browse_outfile)
+    load_btn = ttk.Button(mainframe, text="Load", command=load)
+    process_btn = ttk.Button(mainframe, text="Convert", command=process, style="ConvertButton.TButton")
+
+    # display the widgets
+    infile_lbl.grid(row=0, column=0, sticky='w')
+    infile_entr.grid(row=1, column=0, sticky='we')
+    infile_btn.grid(row=1, column=1, sticky='w')
+
+    outfile_lbl.grid(row=0, column=3, sticky='w')
+    outfile_entr.grid(row=1, column=3, sticky='we')
+    outfile_btn.grid(row=1, column=4, sticky='w')
+
+    load_btn.grid(row=2, column=0)
+    process_btn.grid(row=2, column=2)
+
+    ttk.Separator(mainframe, orient='horizontal').grid(row=3, column=0, columnspan=5, sticky='nsew', pady=20)
+
+    input_frame.grid(row=4, column=0, columnspan=2)
+    output_frame.grid(row=4, column=3, columnspan=2)
+
+    ttk.Separator(root, orient='horizontal').grid(row=1, column=0, sticky='nsew')
+
+    mainframe.grid(row=2, column=0, sticky='nsew')
+
+    root.mainloop()
+
+
+if '--cmd' in sys.argv:
+    for line in process_simpl(sys.stdin):
+        print('\t'.join(line))
+else:
+    launch_gui()