view latlon_conv.py @ 0:ef146e4077e6 draft default tip

planemo upload commit 232ce39054ce38be27c436a4cabec2800e14f988-dirty
author itaxotools
date Sun, 29 Jan 2023 16:44:56 +0000
parents
children
line wrap: on
line source

#!/usr/bin/env python3
import re
import os
import math
import sys
from typing import List, Tuple, Union, Iterator, Optional
import tkinter as tk
from tkinter import ttk
import tkinter.filedialog as tkfiledialog
import tkinter.messagebox
import tkinter.font as tkfont
import warnings

# the parsers' input type
Tokens = List[Tuple[int, str]]
# types of minutes: either a float or int with seconds
Minute = Union[float, Tuple[int, float]]
# type of coordinates: either a float or sign, degree and minutes
Coordinate = Union[float, Tuple[bool, int, Minute]]


def dec_minute(minute: Minute) -> float:
    if isinstance(minute, float):
        return minute
    else:
        return minute[0] + minute[1] / 60


def dec_coord(coord: Coordinate) -> float:
    if isinstance(coord, float):
        return coord
    else:
        return (1 if coord[0] else -1) * (coord[1] + dec_minute(coord[2]) / 60)


def sx_coord(coord: Coordinate) -> Tuple[bool, int, Tuple[int, float]]:
    if isinstance(coord, float):
        sign = coord >= 0
        coord = abs(coord)
        deg = math.floor(coord)
        return sx_coord((sign, deg, (coord - deg) * 60))
    else:
        sign, deg, mm = coord
        if isinstance(mm, float):
            mm_int = math.floor(mm)
            sec = (mm - mm_int) * 60
            return (sign, deg, (mm_int, sec))
        else:
            return (sign, deg, mm)


def str_coord(coord: Coordinate, lat: bool) -> str:
    if lat:
        hems = ['S', 'N']
    else:
        hems = ['W', 'E']
    if isinstance(coord, float):
        return f"{abs(coord):.5f}{hems[coord >= 0]}"
    else:
        sign,degrees, minutes = coord
        if isinstance(minutes, float):
            return f"{degrees}°{minutes:.3f}'{hems[sign]}"
        else:
            return f"{degrees}°{minutes[0]}'{minutes[1]:.1f}''{hems[sign]}"


def signed_coord(coord: str) -> str:
    hem = coord[-1]
    if hem in 'SW':
        return '-' + coord[:-1]
    else:
        return coord[:-1]


def prepare_string(string: str) -> str:
    """
    standardizes the string

    raises ValueError if both 'O' or 'o' and '°' are present in the string
    """
    if ('O' in string or 'o' in string) and '°' in string:
        raise ValueError("Encountered 'O' to indicate geographical direction which can mean either West (Spanish/French/Italian) or East (German); please change to E or W before conversion.")
    string = string.casefold()
    string = re.sub('north', 'n', string)
    string = re.sub('south', 's', string)
    string = re.sub('west', 'w', string)
    string = re.sub('east', 'e', string)
    string = re.sub(
        'seconds|sec|["“”‟]|[´`‘’‛][´`‘’‛]|[´`‘’‛] [´`‘’‛]', "''", string)
    string = re.sub('minutes|min|[´`‘’‛]', "'", string)
    string = re.sub('degrees|deg|o', '°', string)
    return string


def parse_coord(tokens: Tokens) -> Tuple[Coordinate, Tokens]:
    """
    parse a single coordinate and return the rest of the string

    raises a ValueError("parse error") if the beginning doesn't match a coordinate
    """
    # deal with the simple situations
    if not tokens:
        raise ValueError("parse error")
    elif len(tokens) == 1:
        return float(tokens[0][0]), tokens[1:]

    first_sep = tokens[0][1]
    if first_sep[0] in '.,':
        # parse floating point coordinate
        return parse_float(tokens)
    elif first_sep[0] == '°':
        # parse degree, minutes coordinate
        degrees = tokens[0][0]
        try:
            minutes, tokens1 = parse_minutes(tokens[1:])
            return (degrees >= 0, abs(degrees), minutes), tokens1
        except ValueError:
            # there is no minutes
            return (float(degrees), tokens[1:])
    elif first_sep[0] == 'xB0':
        # parse degree, minutes coordinate
        degrees = tokens[0][0]
        try:
            minutes, tokens1 = parse_minutes(tokens[1:])
            return (degrees >= 0, abs(degrees), minutes), tokens1
        except ValueError:
            # there is no minutes
            return (float(degrees), tokens[1:])

    else:
        raise ValueError("parse error")


def parse_float(tokens: Tokens) -> Tuple[float, Tokens]:
    """
    parse a float and return the rest of the string

    raise a ValueError("parse error") if the length is less than two
    """
    if len(tokens) < 2:
        raise ValueError("parse error")
    else:
        int_part = tokens[0][0]
        dec_part = tokens[1][0]
        return (float(str(int_part) + '.' + str(dec_part)), tokens[2:])


def parse_minutes(tokens: Tokens) -> Tuple[Minute, Tokens]:
    """
    parse a coordinate starting with minutes and return the rest of the string

    raises a ValueError("parse error"), if the parsing fails
    """
    # deal with the simple situations
    if not tokens:
        raise ValueError("parse error")
    elif len(tokens) == 1:
        return float(tokens[0][0]), tokens[1:]

    first_sep = tokens[0][1]
    if first_sep[0] in '.,':
        # parse floating point minutes
        return parse_float(tokens)
    elif first_sep[0] == "'":
        # parse minutes, seconds
        minutes = tokens[0][0]
        try:
            seconds, tokens1 = parse_seconds(tokens[1:])
            return ((minutes, seconds), tokens1)
        except ValueError:
            # there is no seconds
            return (float(minutes), tokens[1:])
    else:
        raise ValueError("parse error")


def parse_seconds(tokens: Tokens) -> Tuple[float, Tokens]:
    """
    parse a coordinate starting with seconds and return the rest of the string

    raises a ValueError("parse error"), if the parsing fails
    """
    # deal with the simple situations
    if not tokens:
        raise ValueError("parse error")
    elif len(tokens) == 1:
        return float(tokens[0][0]), tokens[1:]

    first_sep = tokens[0][1]
    if first_sep[0] in '.,':
        # parse floating point seconds
        return parse_float(tokens)
    elif first_sep[0:2] == "''":
        # parse seconds
        return tokens[0][0], tokens[1:]
    else:
        raise ValueError("parse error")


def hemisphere_sign(c: str, coord: Coordinate) -> Coordinate:
    if c in 'ne':
        return coord
    else:
        if isinstance(coord, float):
            return -coord
        else:
            return (not coord[0], coord[1], coord[2])

def cannot_parse_error(tokens: Tokens) -> ValueError:
    """
    makes a ValueError
    "Cannot parse: tokens as str"
    """
    return ValueError("Cannot parse: " + ''.join(str(n)+sep for n, sep in tokens))

def parse_coordinates(string: str, lat_first: bool) -> Tuple[Coordinate, Coordinate]:
    """
    parses a string into coordinates with latitude first

    lat_first indicates whether latitude is first in unmarked strings

    if the whole string is not consumed, raises a ValueError with the rest
    """
    # sanitize the string
    string = prepare_string(string)
    # extract the quadrant information
    letters = [c for c in string if c.isalpha()]
    quadrant = [c for c in letters if c in 'nsew']
    if len(letters) > len(quadrant):
        # there are disallowed letters in coordinates
        raise ValueError("Letters {set(letters) - set(quadrant)} cannot be regognized as hemispheres")
    # defines method orient that exchanges and negates the coordinates based on the quadrant
    if not quadrant:
        if lat_first:
            def orient(p: Tuple[Coordinate, Coordinate]
                       ) -> Tuple[Coordinate, Coordinate]:
                return p
        else:
            def orient(p: Tuple[Coordinate, Coordinate]
                       ) -> Tuple[Coordinate, Coordinate]:
                return (p[1], p[0])
    elif len(quadrant) == 2:
        if quadrant[0] in 'ns' and quadrant[1] in 'we':
            def swap(p: Tuple[Coordinate, Coordinate]) -> Tuple[Coordinate, Coordinate]:
                return p
        elif quadrant[0] in 'we' and quadrant[1] in 'ns':
            def swap(p: Tuple[Coordinate, Coordinate]) -> Tuple[Coordinate, Coordinate]:
                return (p[1], p[0])

        def orient(p: Tuple[Coordinate, Coordinate]) -> Tuple[Coordinate, Coordinate]:
            return swap((hemisphere_sign(quadrant[0], p[0]), hemisphere_sign(quadrant[1], p[1])))
    else:
        raise ValueError(f"Cannot recognize the order of coordinates: {string}")
    # split the string into tokens
    tokens = [(int(m.group(1)), m.group(2))
              for m in re.finditer(r'(-?\d+)([^\d-]*)', string)]
    # parse coordinates one after the other
    try:
        coord0, tokens1 = parse_coord(tokens)
    except ValueError as ex:
        raise cannot_parse_error(tokens) from ex
    if not tokens1 and len(tokens) == 2: # probably the degrees, degrees situation
        return orient((float(tokens[0][0]), float(tokens[1][0])))
    try:
        coord1, rest = parse_coord(tokens1)
    except ValueError as ex:
        raise cannot_parse_error(tokens1) from ex
    if rest:
        # incomplete parse: error
        raise cannot_parse_error(rest)
    else:
        return orient((coord0, coord1))

def validate_coord(coord: Coordinate, direction: str) -> str:
    """
    direction is "latitude" or "longitude"

    Returns a message if minutes or seconds are invalid (not in [0, 60))
    Otherwise returns an empty string
    """
    if isinstance(coord, float):
        if not 0 <= coord <= 180:
            return f"Invalid degrees in {direction}"
        else:
            return ""
    else:
        _, degrees, minutes = coord
        if not 0 <= degrees <= 180:
            return f"Invalid degrees in {direction}"
        if isinstance(minutes, float):
            if not 0 <= minutes < 60:
                return f"Invalid minutes in {direction}"
            else:
                return ""
        else:
            whole_minutes, seconds = minutes
            if not 0 <= whole_minutes < 60:
                return f"Invalid minutes in {direction}"
            elif not 0 <= seconds < 60:
                return f"Invalid seconds in {direction}"
            else:
                return ""


def process_simpl(input: Iterator[str]) -> Iterator[List[str]]:
    # by default latitude comes first)
    lat_first = True
    # read the first line
    try:
        line = next(input)
        heading = line.casefold()
    except StopIteration:
        return
    # try to find 'lat' and 'lon' in the first line
    lat_ind = heading.find('lat')
    lon_ind = heading.find('lon')
    if lat_ind >= 0 and lon_ind >= 0:
        # first line in the heading
        lat_first = lat_ind <= lon_ind
        try:
            line = next(input)
        except StopIteration:
            return
    # yield the output heading
    both = "latlon" if lat_first else "lotlan"
    yield ["original_lat", "original_lon", f"original_{both}", "lat_corr", "lon_corr", "lat_dec", "lon_dec", "latlon_dec", "lat_sx", "lon_sx", "latlon_sx", "Remark"]
    while True:
        # format the part of the output with the original information
        line = line.strip()
        if not line:
            try:
                line = next(input)
            except StopIteration:
                break
            continue
        part1, _, part2 = line.partition('\t')
        if not part1 or not part2 or part1.isspace() or part2.isspace():
            original = ["", "", line]
        elif lat_first:
            original = [part1, part2, ""]
        else:
            original = [part2, part1, ""]
        # try to parse the line, if it fails, output just the original
        try:
            lat, lon = parse_coordinates(line, lat_first)
        except ValueError as ex:
            yield original + [""] * 8 + [str(ex)]
            try:
                line = next(input)
            except StopIteration:
                break
            continue
        # validate bounds on minutes and seconds
        remark = validate_coord(lat, "latitude") + validate_coord(lon, "longitude")
        if remark:
            yield original + [""] * 8 + [remark]
            try:
                line = next(input)
            except StopIteration:
                break
            continue
        # compose the output
        lat_corr = str_coord(lat, True)
        lon_corr = str_coord(lon, False)
        lat_dec = str_coord(dec_coord(lat), True)
        lon_dec = str_coord(dec_coord(lon), False)
        lat_sx = str_coord(sx_coord(lat), True)
        lon_sx = str_coord(sx_coord(lon), False)
        yield original + [lat_corr, lon_corr, signed_coord(lat_dec), signed_coord(lon_dec), f"{lat_dec} {lon_dec}", lat_sx, lon_sx, f"{lat_sx} {lon_sx}", ""]
        try:
            line = next(input)
        except StopIteration:
            break


def launch_gui() -> None:
    # initialization
    root = tk.Tk()
    root.title("LatLonConverter")
    if os.name == "nt":
        root.wm_iconbitmap(os.path.join('data', 'LatLonIcon.ico'))
    mainframe = ttk.Frame(root, padding=5)
    root.rowconfigure(1, weight=1)
    root.columnconfigure(0, weight=1)
    mainframe.rowconfigure(4, weight=1)
    mainframe.columnconfigure(2, weight=1)

    style = ttk.Style()
    style.configure("ConvertButton.TButton", background="blue")

    # banner frame
    banner_frame = ttk.Frame(root)
    banner_img = tk.PhotoImage(file=os.path.join(
        "data", "iTaxoTools Digital linneaeus MICROLOGO.png"))
    banner_image = ttk.Label(banner_frame, image=banner_img)
    banner_image.grid(row=0, column=0, rowspan=2, sticky='nsw')
    program_name = ttk.Label(
        banner_frame, text="LatLonConverter", font=tkfont.Font(size=20))
    program_name.grid(row=1, column=1, sticky='sw')
    program_description = ttk.Label(
        banner_frame, text="A batch converter of geographical coordinates")
    program_description.grid(row=1, column=2, sticky='sw', ipady=4, ipadx=15)
    banner_frame.grid(column=0, row=0, sticky='nsw')


    # create labels
    infile_lbl = ttk.Label(mainframe, text="Input file")
    outfile_lbl = ttk.Label(mainframe, text="Output file")

    # create entries
    infile_var = tk.StringVar()
    infile_entr = ttk.Entry(mainframe, textvariable=infile_var)
    outfile_var = tk.StringVar()
    outfile_entr = ttk.Entry(mainframe, textvariable=outfile_var)

    # create texts
    input_frame = ttk.Frame(mainframe)
    input_frame.rowconfigure(1, weight=1)
    input_frame.columnconfigure(0, weight=1)
    input_text = tk.Text(input_frame, width=50, height=15, undo=True)
    input_lbl = ttk.Label(input_frame, text="Paste coordinates here for fast conversion into decimal format\n(one pair of coordinates per line, in any format)")
    input_xscroll = ttk.Scrollbar(
        input_frame, orient=tk.HORIZONTAL, command=input_text.xview)
    input_yscroll = ttk.Scrollbar(
        input_frame, orient=tk.VERTICAL, command=input_text.yview)
    input_text.configure(xscrollcommand=input_xscroll.set,
                         yscrollcommand=input_yscroll.set)
    input_lbl.grid(row=0, column=0, sticky='w')
    input_text.grid(row=1, column=0, sticky='nsew')
    input_xscroll.grid(row=2, column=0, sticky='nsew')
    input_yscroll.grid(row=1, column=1, sticky='nsew')

    output_frame = ttk.Frame(mainframe)
    output_frame.rowconfigure(1, weight=1)
    output_frame.columnconfigure(0, weight=1)
    output_text = tk.Text(output_frame, width=50, height=15, wrap='none')
    output_lbl = ttk.Label(output_frame, text="If the data have been pasted into the window on the left,\nthe converted output will be show here.")
    output_xscroll = ttk.Scrollbar(
        output_frame, orient=tk.HORIZONTAL, command=output_text.xview)
    output_yscroll = ttk.Scrollbar(
        output_frame, orient=tk.VERTICAL, command=output_text.yview)
    output_text.configure(xscrollcommand=output_xscroll.set,
                          yscrollcommand=output_yscroll.set)
    output_text.configure(state='disabled')
    # make sure the widget gets focus when clicked
    # on, to enable highlighting and copying to the
    # clipboard.
    output_text.bind("<1>", lambda _: output_text.focus_set())
    output_lbl.grid(row=0, column=0, sticky='w')
    output_text.grid(row=1, column=0, sticky='nsew')
    output_xscroll.grid(row=2, column=0, sticky='nsew')
    output_yscroll.grid(row=1, column=1, sticky='nsew')

    # internal functions
    def input_lines() -> Iterator[str]:
        """
        returns an iterator over the input lines

        if the input file name is given, the line comes from it,
        otherwise from the input text widget
        """
        filename = infile_var.get()
        if filename and not filename.isspace():
            with open(filename, encoding="ansi", errors='replace') as file:
                for line in file:
                    yield line
        else:
            text = input_text.get('1.0', 'end')
            for line in text.splitlines():
                yield line

    def write_output(lines: Iterator[List[str]]) -> None:
        """
        writes the output

        if the output file name is given, the output is written to it,
        otherwise to the output text widget
        """
        output_text.configure(state='normal')
        filename = outfile_var.get()
        output_text.delete('1.0', 'end')
        if filename and not filename.isspace():
            with open(filename, encoding="ansi", mode='w') as file:
                for line in lines:
                    print("\t".join(line), file=file)
        else:
            for line in lines:
                output_text.insert('end', f"{line[5]}\t{line[6]}\t{line[-1]}")
                output_text.insert('end', '\n')
        output_text.configure(state='disabled')

    def browse_infile() -> None:
        newpath: Optional[str] = tkfiledialog.askopenfilename()
        if (newpath):
            try:
                newpath = os.path.relpath(newpath)
            except:
                newpath = os.path.abspath(newpath)
            infile_var.set(newpath)

    def browse_outfile() -> None:
        newpath: Optional[str] = tkfiledialog.asksaveasfilename()
        if (newpath):
            try:
                newpath = os.path.relpath(newpath)
            except:
                newpath = os.path.abspath(newpath)
            outfile_var.set(newpath)

    def process() -> None:
        """
        command for the Process button
        """
        try:
            # catch all warnings
            with warnings.catch_warnings(record=True) as warns:
                write_output(process_simpl(input_lines()))
                # display the warnings generated during the conversion
                for w in warns:
                    tkinter.messagebox.showwarning("Warning", str(w.message))
            # notify the user that the converions is finished
            tkinter.messagebox.showinfo(
                "Done.", "The processing has been completed")
        # show the ValueErrors and FileNotFoundErrors
        except ValueError as ex:
            tkinter.messagebox.showerror("Error", str(ex))
        except FileNotFoundError as ex:
            tkinter.messagebox.showerror("Error", str(ex))

    def load() -> None:
        """
        loads the text from the input file into the input text widget
        """
        filename = infile_var.get()
        input_text.delete('1.0', 'end')
        if filename and not filename.isspace():
            with open(filename, encoding="utf8", errors='replace') as file:
                for line in file:
                    input_text.insert('end', line)

    # create buttons
    infile_btn = ttk.Button(mainframe, text="Browse", command=browse_infile)
    outfile_btn = ttk.Button(mainframe, text="Browse", command=browse_outfile)
    load_btn = ttk.Button(mainframe, text="Load", command=load)
    process_btn = ttk.Button(mainframe, text="Convert", command=process, style="ConvertButton.TButton")

    # display the widgets
    infile_lbl.grid(row=0, column=0, sticky='w')
    infile_entr.grid(row=1, column=0, sticky='we')
    infile_btn.grid(row=1, column=1, sticky='w')

    outfile_lbl.grid(row=0, column=3, sticky='w')
    outfile_entr.grid(row=1, column=3, sticky='we')
    outfile_btn.grid(row=1, column=4, sticky='w')

    load_btn.grid(row=2, column=0)
    process_btn.grid(row=2, column=2)

    ttk.Separator(mainframe, orient='horizontal').grid(row=3, column=0, columnspan=5, sticky='nsew', pady=20)

    input_frame.grid(row=4, column=0, columnspan=2)
    output_frame.grid(row=4, column=3, columnspan=2)

    ttk.Separator(root, orient='horizontal').grid(row=1, column=0, sticky='nsew')

    mainframe.grid(row=2, column=0, sticky='nsew')

    root.mainloop()


if '--cmd' in sys.argv:
    for line in process_simpl(sys.stdin):
        print('\t'.join(line))
else:
    launch_gui()