Mercurial > repos > perssond > ashlar
changeset 3:ef68bc2a4dbc draft default tip
planemo upload for repository https://github.com/ohsu-comp-bio/ashlar commit 69f200fcfa0b1d17de50466c51d8c5468fdeb54c
author | goeckslab |
---|---|
date | Fri, 09 Feb 2024 22:48:46 +0000 |
parents | 33ab2058c6d9 |
children | |
files | ashlar.xml macros.xml pyramid_upgrade.py |
diffstat | 3 files changed, 10 insertions(+), 593 deletions(-) [+] |
line wrap: on
line diff
--- a/ashlar.xml Tue Sep 20 17:33:50 2022 +0000 +++ b/ashlar.xml Fri Feb 09 22:48:46 2024 +0000 @@ -51,6 +51,10 @@ -c $adv.align_channel + #if $adv.stitch_alpha + --stitch-alpha $adv.stitch_alpha + #end if + #if $adv.filter_sigma --filter-sigma $adv.filter_sigma #end if @@ -73,20 +77,10 @@ #end for #end if - $adv.pyramid $adv.flip_mosaic_x $adv.flip_mosaic_y - -f registered.ome.tif; - - #if $upgrade.decide == "do_upgrade" - python3 '${__tool_directory__}/pyramid_upgrade.py' - registered.ome.tif - - #if $upgrade.markers_file - -n `echo \$(cat $upgrade.markers_file | tail -n +2 | awk -F, '{print \$3}')`; - #end if - #end if + -o registered.ome.tif; ]]></command> <inputs> @@ -96,24 +90,13 @@ <param name="flip_x" type="boolean" truevalue="--flip-x" falsevalue="" label="Flip X-axis"/> <param name="flip_y" type="boolean" truevalue="--flip-y" falsevalue="" label="Flip Y-axis"/> <param name="max_shift" type="integer" value="30" label="Maximum allowed per-tile corrective shift" help="In micros"/> - <conditional name="upgrade"> - <param name="decide" type="select" label="Upgrade to BF6-Compliant OME-TIFF Pyramid"> - <option value="do_upgrade">Upgrade Pyramid</option> - <option value="dont_upgrade">Leave Legacy Pyramid</option> - </param> - <when value="do_upgrade"> - <param name="markers_file" type="data" format="csv,tabular" optional="true" label="Markers File (optional)"/> - </when> - <when value="dont_upgrade"> - </when> - </conditional> <section name="adv" title="Advanced Options" expanded="false"> <param name="align_channel" type="integer" value="0" label="Align Channel Number"/> + <param name="stitch_alpha" type="float" optional="true" label="Alpha"/> <param name="filter_sigma" type="float" optional="true" label="Sigma"/> <param name="tile_size" type="integer" optional="true" label="Cyto Mask Channel"/> <param name="flip_mosaic_x" type="boolean" truevalue="--flip-mosaic-x" falsevalue="" label="Flip output image horizontally"/> <param name="flip_mosaic_y" type="boolean" truevalue="--flip-mosaic-y" falsevalue="" label="Flip output image vertically"/> - <param name="pyramid" type="boolean" checked="true" truevalue="--pyramid" falsevalue="" label="Write output as a single pyramidal TIFF"/> </section> </inputs> @@ -130,7 +113,7 @@ </param> <output name="output" ftype="ome.tiff"> <assert_contents> - <has_size value="4000000" delta="1000000" /> + <has_size value="500000" delta="400000" /> </assert_contents> </output> </test>
--- a/macros.xml Tue Sep 20 17:33:50 2022 +0000 +++ b/macros.xml Fri Feb 09 22:48:46 2024 +0000 @@ -12,11 +12,11 @@ </xml> <xml name="citations"> <citations> - <citation type="doi">10.1101/2021.04.20.440625</citation> + <citation type="doi">10.1093/bioinformatics/btac544</citation> </citations> </xml> - <token name="@TOOL_VERSION@">1.14.0</token> - <token name="@VERSION_SUFFIX@">1</token> + <token name="@TOOL_VERSION@">1.18.0</token> + <token name="@VERSION_SUFFIX@">0</token> <token name="@CMD_BEGIN@">ashlar</token> </macros>
--- a/pyramid_upgrade.py Tue Sep 20 17:33:50 2022 +0000 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,566 +0,0 @@ -import argparse -import dataclasses -import fractions -import io -import os -import re -import reprlib -import struct -import sys -import xml.etree.ElementTree -from typing import Any, List - - -datatype_formats = { - 1: "B", # BYTE - 2: "s", # ASCII - 3: "H", # SHORT - 4: "I", # LONG - 5: "I", # RATIONAL (pairs) - 6: "b", # SBYTE - 7: "B", # UNDEFINED - 8: "h", # SSHORT - 9: "i", # SLONG - 10: "i", # SRATIONAL (pairs) - 11: "f", # FLOAT - 12: "d", # DOUBLE - 13: "I", # IFD - 16: "Q", # LONG8 - 17: "q", # SLONG8 - 18: "Q", # IFD8 -} -rational_datatypes = {5, 10} - - -class TiffSurgeon: - """Read, manipulate and write IFDs in BigTIFF files.""" - - def __init__(self, path, *, writeable=False, encoding=None): - self.path = path - self.writeable = writeable - self.encoding = encoding - self.endian = "" - self.ifds = None - self.file = open(self.path, "r+b" if self.writeable else "rb") - self._validate() - - def _validate(self): - signature = self.read("2s") - signature = signature.decode("ascii", errors="ignore") - if signature == "II": - self.endian = "<" - elif signature == "MM": - self.endian = ">" - else: - raise FormatError(f"Not a TIFF file (signature is '{signature}').") - version = self.read("H") - if version == 42: - raise FormatError("Cannot process classic TIFF, only BigTIFF.") - offset_size, reserved, first_ifd_offset = self.read("H H Q") - if version != 43 or offset_size != 8 or reserved != 0: - raise FormatError("Malformed TIFF, giving up!") - self.first_ifd_offset = first_ifd_offset - - def read(self, fmt, *, file=None): - if file is None: - file = self.file - endian = self.endian or "=" - size = struct.calcsize(endian + fmt) - raw = file.read(size) - value = self.unpack(fmt, raw) - return value - - def write(self, fmt, *values): - if not self.writeable: - raise ValueError("File is opened as read-only.") - raw = self.pack(fmt, *values) - self.file.write(raw) - - def unpack(self, fmt, raw): - assert self.endian or re.match(r"\d+s", fmt), \ - "can't unpack non-string before endianness is detected" - fmt = self.endian + fmt - size = struct.calcsize(fmt) - values = struct.unpack(fmt, raw[:size]) - if len(values) == 1: - return values[0] - else: - return values - - def pack(self, fmt, *values): - assert self.endian, "can't pack without endian set" - fmt = self.endian + fmt - raw = struct.pack(fmt, *values) - return raw - - def read_ifds(self): - ifds = [self.read_ifd(self.first_ifd_offset)] - while ifds[-1].offset_next: - ifds.append(self.read_ifd(ifds[-1].offset_next)) - self.ifds = ifds - - def read_ifd(self, offset): - self.file.seek(offset) - num_tags = self.read("Q") - buf = io.BytesIO(self.file.read(num_tags * 20)) - offset_next = self.read("Q") - try: - tags = TagSet([self.read_tag(buf) for i in range(num_tags)]) - except FormatError as e: - raise FormatError(f"IFD at offset {offset}, {e}") from None - ifd = Ifd(tags, offset, offset_next) - return ifd - - def read_tag(self, buf): - tag = Tag(*self.read("H H Q 8s", file=buf)) - value, offset_range = self.tag_value(tag) - tag = dataclasses.replace(tag, value=value, offset_range=offset_range) - return tag - - def append_ifd_sequence(self, ifds): - """Write list of IFDs as a chained sequence at the end of the file. - - Returns a list of new Ifd objects with updated offsets. - - """ - self.file.seek(0, os.SEEK_END) - new_ifds = [] - for ifd in ifds: - offset = self.file.tell() - self.write("Q", len(ifd.tags)) - for tag in ifd.tags: - self.write_tag(tag) - offset_next = self.file.tell() + 8 if ifd is not ifds[-1] else 0 - self.write("Q", offset_next) - new_ifd = dataclasses.replace( - ifd, offset=offset, offset_next=offset_next - ) - new_ifds.append(new_ifd) - return new_ifds - - def append_tag_data(self, code, datatype, value): - """Build new tag and write data to the end of the file if necessary. - - Returns a Tag object corresponding to the passed parameters. This - function only writes any "overflow" data and not the IFD entry itself, - so the returned Tag must still be written to an IFD. - - If the value is small enough to fit in the data field within an IFD, no - data will actually be written to the file and the returned Tag object - will have the value encoded in its data attribute. Otherwise the data - will be appended to the file and the returned Tag's data attribute will - encode the corresponding offset. - - """ - fmt = datatype_formats[datatype] - # FIXME Should we perform our own check that values match datatype? - # struct.pack will do it but the exception won't be as understandable. - original_value = value - if isinstance(value, str): - if not self.encoding: - raise ValueError( - "ASCII tag values must be bytes if encoding is not set" - ) - value = [value.encode(self.encoding) + b"\x00"] - count = len(value[0]) - elif isinstance(value, bytes): - value = [value + b"\x00"] - count = len(value[0]) - else: - try: - len(value) - except TypeError: - value = [value] - count = len(value) - struct_count = count - if datatype in rational_datatypes: - value = [i for v in value for i in v.as_integer_ratio()] - count //= 2 - byte_count = struct_count * struct.calcsize(fmt) - if byte_count <= 8: - data = self.pack(str(struct_count) + fmt, *value) - data += bytes(8 - byte_count) - else: - self.file.seek(0, os.SEEK_END) - data = self.pack("Q", self.file.tell()) - self.write(str(count) + fmt, *value) - # TODO Compute and set offset_range. - tag = Tag(code, datatype, count, data, original_value) - return tag - - def write_first_ifd_offset(self, offset): - self.file.seek(8) - self.write("Q", offset) - - def write_tag(self, tag): - self.write("H H Q 8s", tag.code, tag.datatype, tag.count, tag.data) - - def tag_value(self, tag): - """Return decoded tag data and the file offset range.""" - fmt = datatype_formats[tag.datatype] - count = tag.count - if tag.datatype in rational_datatypes: - count *= 2 - byte_count = count * struct.calcsize(fmt) - if byte_count <= 8: - value = self.unpack(str(count) + fmt, tag.data) - offset_range = range(0, 0) - else: - offset = self.unpack("Q", tag.data) - self.file.seek(offset) - value = self.read(str(count) + fmt) - offset_range = range(offset, offset + byte_count) - if tag.datatype == 2: - value = value.rstrip(b"\x00") - if self.encoding: - try: - value = value.decode(self.encoding) - except UnicodeDecodeError as e: - raise FormatError(f"tag {tag.code}: {e}") from None - elif tag.datatype in rational_datatypes: - value = [ - fractions.Fraction(*v) for v in zip(value[::2], value[1::2]) - ] - if len(value) == 1: - value = value[0] - return value, offset_range - - def close(self): - self.file.close() - - -@dataclasses.dataclass(frozen=True) -class Tag: - code: int - datatype: int - count: int - data: bytes - value: Any = None - offset_range: range = None - - _vrepr = reprlib.Repr() - _vrepr.maxstring = 60 - _vrepr.maxother = 60 - vrepr = _vrepr.repr - - def __repr__(self): - return ( - self.__class__.__qualname__ + "(" - + f"code={self.code!r}, datatype={self.datatype!r}, " - + f"count={self.count!r}, data={self.data!r}, " - + f"value={self.vrepr(self.value)}" - + ")" - ) - - -@dataclasses.dataclass(frozen=True) -class TagSet: - """Container for Tag objects as stored in a TIFF IFD. - - Tag objects are maintained in a list that's always sorted in ascending order - by the tag code. Only one tag for a given code may be present, which is where - the "set" name comes from. - - """ - - tags: List[Tag] = dataclasses.field(default_factory=list) - - def __post_init__(self): - if len(self.codes) != len(set(self.codes)): - raise ValueError("Duplicate tag codes are not allowed.") - - def __repr__(self): - ret = type(self).__name__ + "([" - if self.tags: - ret += "\n" - ret += "".join([f" {t},\n" for t in self.tags]) - ret += "])" - return ret - - @property - def codes(self): - return [t.code for t in self.tags] - - def __getitem__(self, code): - for t in self.tags: - if code == t.code: - return t - else: - raise KeyError(code) - - def __delitem__(self, code): - try: - i = self.codes.index(code) - except ValueError: - raise KeyError(code) from None - self.tags[:] = self.tags[:i] + self.tags[i + 1:] - - def __contains__(self, code): - return code in self.codes - - def __len__(self): - return len(self.tags) - - def __iter__(self): - return iter(self.tags) - - def get(self, code, default=None): - try: - return self[code] - except KeyError: - return default - - def get_value(self, code, default=None): - tag = self.get(code) - if tag: - return tag.value - else: - return default - - def insert(self, tag): - """Add a new tag or replace an existing one.""" - for i, t in enumerate(self.tags): - if tag.code == t.code: - self.tags[i] = tag - return - elif tag.code < t.code: - break - else: - i = len(self.tags) - n = len(self.tags) - self.tags[i:n + 1] = [tag] + self.tags[i:n] - - -@dataclasses.dataclass(frozen=True) -class Ifd: - tags: TagSet - offset: int - offset_next: int - - @property - def nbytes(self): - return len(self.tags) * 20 + 16 - - @property - def offset_range(self): - return range(self.offset, self.offset + self.nbytes) - - -class FormatError(Exception): - pass - - -def fix_attrib_namespace(elt): - """Prefix un-namespaced XML attributes with the tag's namespace.""" - # This fixes ElementTree's inability to round-trip XML with a default - # namespace ("cannot use non-qualified names with default_namespace option" - # error). 7-year-old BPO issue here: https://bugs.python.org/issue17088 - # Code inspired by https://gist.github.com/provegard/1381912 . - if elt.tag[0] == "{": - uri, _ = elt.tag[1:].rsplit("}", 1) - new_attrib = {} - for name, value in elt.attrib.items(): - if name[0] != "{": - # For un-namespaced attributes, copy namespace from element. - name = f"{{{uri}}}{name}" - new_attrib[name] = value - elt.attrib = new_attrib - for child in elt: - fix_attrib_namespace(child) - - -def parse_args(): - parser = argparse.ArgumentParser( - description="Convert an OME-TIFF legacy pyramid to the BioFormats 6" - " OME-TIFF pyramid format in-place.", - ) - parser.add_argument("image", help="OME-TIFF file to convert") - parser.add_argument( - "-n", - dest="channel_names", - nargs="+", - default=[], - metavar="NAME", - help="Channel names to be inserted into OME metadata. Number of names" - " must match number of channels in image. Be sure to put quotes" - " around names containing spaces or other special shell characters." - ) - args = parser.parse_args() - return args - - -def main(): - - args = parse_args() - - image_path = sys.argv[1] - try: - tiff = TiffSurgeon(image_path, encoding="utf-8", writeable=True) - except FormatError as e: - print(f"TIFF format error: {e}") - sys.exit(1) - - tiff.read_ifds() - - # ElementTree doesn't parse xml declarations so we'll just run some sanity - # checks that we do have UTF-8 and give it a decoded string instead of raw - # bytes. We need to both ensure that the raw tag bytes decode properly and - # that the declaration encoding is UTF-8 if present. - try: - omexml = tiff.ifds[0].tags.get_value(270, "") - except FormatError: - print("ImageDescription tag is not a valid UTF-8 string (not an OME-TIFF?)") - sys.exit(1) - if re.match(r'<\?xml [^>]*encoding="(?!UTF-8)[^"]*"', omexml): - print("OME-XML is encoded with something other than UTF-8.") - sys.exit(1) - - xml_ns = {"ome": "http://www.openmicroscopy.org/Schemas/OME/2016-06"} - - if xml_ns["ome"] not in omexml: - print("Not an OME-TIFF.") - sys.exit(1) - if ( - "Faas" not in tiff.ifds[0].tags.get_value(305, "") - or 330 in tiff.ifds[0].tags - ): - print("Not a legacy OME-TIFF pyramid.") - sys.exit(1) - - # All XML manipulation assumes the document is valid OME-XML! - root = xml.etree.ElementTree.fromstring(omexml) - image = root.find("ome:Image", xml_ns) - pixels = image.find("ome:Pixels", xml_ns) - size_x = int(pixels.get("SizeX")) - size_y = int(pixels.get("SizeY")) - size_c = int(pixels.get("SizeC")) - size_z = int(pixels.get("SizeZ")) - size_t = int(pixels.get("SizeT")) - num_levels = len(root.findall("ome:Image", xml_ns)) - page_dims = [(ifd.tags[256].value, ifd.tags[257].value) for ifd in tiff.ifds] - - if len(root) != num_levels: - print("Top-level OME-XML elements other than Image are not supported.") - if size_z != 1 or size_t != 1: - print("Z-stacks and multiple timepoints are not supported.") - sys.exit(1) - if size_c * num_levels != len(tiff.ifds): - print("TIFF page count does not match OME-XML Image elements.") - sys.exit(1) - if any(dims != (size_x, size_y) for dims in page_dims[:size_c]): - print(f"TIFF does not begin with SizeC={size_c} full-size pages.") - sys.exit(1) - for level in range(1, num_levels): - level_dims = page_dims[level * size_c: (level + 1) * size_c] - if len(set(level_dims)) != 1: - print( - f"Pyramid level {level + 1} out of {num_levels} has inconsistent" - f" sizes:\n{level_dims}" - ) - sys.exit(1) - if args.channel_names and len(args.channel_names) != size_c: - print( - f"Wrong number of channel names -- image has {size_c} channels but" - f" {len(args.channel_names)} names were specified:" - ) - for i, n in enumerate(args.channel_names, 1): - print(f"{i:4}: {n}") - sys.exit(1) - - print("Input image summary") - print("===================") - print(f"Dimensions: {size_x} x {size_y}") - print(f"Number of channels: {size_c}") - print(f"Pyramid sub-resolutions ({num_levels - 1} total):") - for dim_x, dim_y in page_dims[size_c::size_c]: - print(f" {dim_x} x {dim_y}") - software = tiff.ifds[0].tags.get_value(305, "<not set>") - print(f"Software: {software}") - print() - - print("Updating OME-XML metadata...") - # We already verified there is nothing but Image elements under the root. - for other_image in root[1:]: - root.remove(other_image) - for tiffdata in pixels.findall("ome:TiffData", xml_ns): - pixels.remove(tiffdata) - new_tiffdata = xml.etree.ElementTree.Element( - f"{{{xml_ns['ome']}}}TiffData", - attrib={"IFD": "0", "PlaneCount": str(size_c)}, - ) - # A valid OME-XML Pixels begins with size_c Channels; then comes TiffData. - pixels.insert(size_c, new_tiffdata) - - if args.channel_names: - print("Renaming channels...") - channels = pixels.findall("ome:Channel", xml_ns) - for channel, name in zip(channels, args.channel_names): - channel.attrib["Name"] = name - - fix_attrib_namespace(root) - # ElementTree.tostring would have been simpler but it only supports - # xml_declaration and default_namespace starting with Python 3.8. - xml_file = io.BytesIO() - tree = xml.etree.ElementTree.ElementTree(root) - tree.write( - xml_file, - encoding="utf-8", - xml_declaration=True, - default_namespace=xml_ns["ome"], - ) - new_omexml = xml_file.getvalue() - - print("Writing new TIFF headers...") - stale_ranges = [ifd.offset_range for ifd in tiff.ifds] - main_ifds = tiff.ifds[:size_c] - channel_sub_ifds = [tiff.ifds[c + size_c::size_c] for c in range(size_c)] - for i, (main_ifd, sub_ifds) in enumerate(zip(main_ifds, channel_sub_ifds)): - for ifd in sub_ifds: - if 305 in ifd.tags: - stale_ranges.append(ifd.tags[305].offset_range) - del ifd.tags[305] - ifd.tags.insert(tiff.append_tag_data(254, 3, 1)) - if i == 0: - stale_ranges.append(main_ifd.tags[305].offset_range) - stale_ranges.append(main_ifd.tags[270].offset_range) - old_software = main_ifd.tags[305].value.replace("Faas", "F*a*a*s") - new_software = f"pyramid_upgrade.py (was {old_software})" - main_ifd.tags.insert(tiff.append_tag_data(305, 2, new_software)) - main_ifd.tags.insert(tiff.append_tag_data(270, 2, new_omexml)) - else: - if 305 in main_ifd.tags: - stale_ranges.append(main_ifd.tags[305].offset_range) - del main_ifd.tags[305] - sub_ifds[:] = tiff.append_ifd_sequence(sub_ifds) - offsets = [ifd.offset for ifd in sub_ifds] - main_ifd.tags.insert(tiff.append_tag_data(330, 16, offsets)) - main_ifds = tiff.append_ifd_sequence(main_ifds) - tiff.write_first_ifd_offset(main_ifds[0].offset) - - print("Clearing old headers and tag values...") - # We overwrite all the old IFDs and referenced data values with obvious - # "filler" as a courtesy to anyone who might need to poke around in the TIFF - # structure down the road. A real TIFF parser wouldn't see the stale data, - # but a human might just scan for the first thing that looks like a run of - # OME-XML and not realize it's been replaced with something else. The filler - # content is the repeated string "unused " with square brackets at the - # beginning and end of each filled IFD or data value. - filler = b"unused " - f_len = len(filler) - for r in stale_ranges: - tiff.file.seek(r.start) - tiff.file.write(b"[") - f_total = len(r) - 2 - for i in range(f_total // f_len): - tiff.file.write(filler) - tiff.file.write(b" " * (f_total % f_len)) - tiff.file.write(b"]") - - tiff.close() - - print() - print("Success!") - - -if __name__ == "__main__": - main()