# tifffile.py # Copyright (c) 2008-2024, Christoph Gohlke # All rights reserved. # # Redistribution and use in source and binary forms, with or without # modification, are permitted provided that the following conditions are met: # # 1. Redistributions of source code must retain the above copyright notice, # this list of conditions and the following disclaimer. # # 2. Redistributions in binary form must reproduce the above copyright notice, # this list of conditions and the following disclaimer in the documentation # and/or other materials provided with the distribution. # # 3. Neither the name of the copyright holder nor the names of its # contributors may be used to endorse or promote products derived from # this software without specific prior written permission. # # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" # AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE # IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE # ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE # LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR # CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF # SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS # INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN # CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) # ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE # POSSIBILITY OF SUCH DAMAGE. r"""Read and write TIFF files. Tifffile is a Python library to (1) store NumPy arrays in TIFF (Tagged Image File Format) files, and (2) read image and metadata from TIFF-like files used in bioimaging. Image and metadata can be read from TIFF, BigTIFF, OME-TIFF, DNG, STK, LSM, SGI, NIHImage, ImageJ, MMStack, NDTiff, FluoView, ScanImage, SEQ, GEL, SVS, SCN, SIS, BIF, ZIF (Zoomable Image File Format), QPTIFF (QPI, PKI), NDPI, Philips DP, and GeoTIFF formatted files. Image data can be read as NumPy arrays or Zarr arrays/groups from strips, tiles, pages (IFDs), SubIFDs, higher order series, and pyramidal levels. Image data can be written to TIFF, BigTIFF, OME-TIFF, and ImageJ hyperstack compatible files in multi-page, volumetric, pyramidal, memory-mappable, tiled, predicted, or compressed form. Many compression and predictor schemes are supported via the imagecodecs library, including LZW, PackBits, Deflate, PIXTIFF, LZMA, LERC, Zstd, JPEG (8 and 12-bit, lossless), JPEG 2000, JPEG XR, JPEG XL, WebP, PNG, EER, Jetraw, 24-bit floating-point, and horizontal differencing. Tifffile can also be used to inspect TIFF structures, read image data from multi-dimensional file sequences, write fsspec ReferenceFileSystem for TIFF files and image file sequences, patch TIFF tag values, and parse many proprietary metadata formats. :Author: `Christoph Gohlke `_ :License: BSD 3-Clause :Version: 2024.5.22 :DOI: `10.5281/zenodo.6795860 `_ Quickstart ---------- Install the tifffile package and all dependencies from the `Python Package Index `_:: python -m pip install -U tifffile[all] Tifffile is also available in other package repositories such as Anaconda, Debian, and MSYS2. The tifffile library is type annotated and documented via docstrings:: python -c "import tifffile; help(tifffile)" Tifffile can be used as a console script to inspect and preview TIFF files:: python -m tifffile --help See `Examples`_ for using the programming interface. Source code and support are available on `GitHub `_. Support is also provided on the `image.sc `_ forum. Requirements ------------ This revision was tested with the following requirements and dependencies (other versions may work): - `CPython `_ 3.9.13, 3.10.11, 3.11.9, 3.12.3, 64-bit - `NumPy `_ 1.26.4 - `Imagecodecs `_ 2024.1.1 (required for encoding or decoding LZW, JPEG, etc. compressed segments) - `Matplotlib `_ 3.8.4 (required for plotting) - `Lxml `_ 5.2.2 (required only for validating and printing XML) - `Zarr `_ 2.18.1 (required only for opening Zarr stores) - `Fsspec `_ 2024.5.0 (required only for opening ReferenceFileSystem files) Revisions --------- 2024.5.22 - Pass 5084 tests. - Derive TiffPages, TiffPageSeries, FileSequence, StoredShape from Sequence. - Truncate circular IFD chain, do not raise TiffFileError (breaking). - Deprecate access to TiffPages.pages and FileSequence.files. - Enable DeprecationWarning for enums in TIFF namespace. - Remove some deprecated code (breaking). - Add iccprofile property to TiffPage and parameter to TiffWriter.write. - Do not detect VSI as SIS format. - Limit length of logged exception messages. - Work around GitHub not rendering docstring examples (#254, #255). 2024.5.10 - Support reading JPEGXL compression in DNG 1.7. - Read invalid TIFF created by IDEAS software. 2024.5.3 - Fix reading incompletely written LSM. - Fix reading Philips DP with extra rows of tiles (#253, breaking). 2024.4.24 - Fix compatibility issue with numpy 2 (#252). 2024.4.18 - Fix write_fsspec when last row of tiles is missing in Philips slide (#249). - Add option not to quote file names in write_fsspec. - Allow compress bilevel images with deflate, LZMA, and Zstd. 2024.2.12 - Deprecate dtype, add chunkdtype parameter in FileSequence.asarray. - Add imreadargs parameters passed to FileSequence.imread. 2024.1.30 - Fix compatibility issue with numpy 2 (#238). - Enable DeprecationWarning for tuple compression argument. - Parse sequence of numbers in xml2dict. 2023.12.9 - Read 32-bit Indica Labs TIFF as float32. - Fix UnboundLocalError reading big LSM files without time axis. - Use os.sched_getaffinity, if available, to get the number of CPUs (#231). - Limit the number of default worker threads to 32. 2023.9.26 - Lazily convert dask array to ndarray when writing. - Allow to specify buffersize for reading and writing. - Fix IndexError reading some corrupted files with ZarrTiffStore (#227). 2023.9.18 - Raise exception when writing non-volume data with volumetric tiles (#225). - Improve multi-threaded writing of compressed multi-page files. - Fix fsspec reference for big-endian files with predictors. 2023.8.30 - Support exclusive file creation mode (#221, #223). 2023.8.25 - Verify shaped metadata is compatible with page shape. - Support out parameter when returning selection from imread (#222). 2023.8.12 - Support decompressing EER frames. - Facilitate filtering logged warnings (#216). - Read more tags from UIC1Tag (#217). - Fix premature closing of files in main (#218). - Don't force matplotlib backend to tkagg in main (#219). - Add py.typed marker. - Drop support for imagecodecs < 2023.3.16. 2023.7.18 - Limit threading via TIFFFILE_NUM_THREADS environment variable (#215). - Remove maxworkers parameter from tiff2fsspec (breaking). 2023.7.10 - Increase default strip size to 256 KB when writing with compression. - Fix ZarrTiffStore with non-default chunkmode. 2023.7.4 - Add option to return selection from imread (#200). - Fix reading OME series with missing trailing frames (#199). - Fix fsspec reference for WebP compressed segments missing alpha channel. - Fix linting issues. - Detect files written by Agilent Technologies. - Drop support for Python 3.8 and numpy < 1.21 (NEP29). 2023.4.12 - Do not write duplicate ImageDescription tags from extratags (breaking). - Support multifocal SVS files (#193). - Log warning when filtering out extratags. - Fix writing OME-TIFF with image description in extratags. - Ignore invalid predictor tag value if prediction is not used. - Raise KeyError if ZarrStore is missing requested chunk. 2023.3.21 - … Refer to the CHANGES file for older revisions. Notes ----- TIFF, the Tagged Image File Format, was created by the Aldus Corporation and Adobe Systems Incorporated. STK, LSM, FluoView, SGI, SEQ, GEL, QPTIFF, NDPI, SCN, SVS, ZIF, BIF, and OME-TIFF, are custom extensions defined by Molecular Devices (Universal Imaging Corporation), Carl Zeiss MicroImaging, Olympus, Silicon Graphics International, Media Cybernetics, Molecular Dynamics, PerkinElmer, Hamamatsu, Leica, ObjectivePathology, Roche Digital Pathology, and the Open Microscopy Environment consortium, respectively. Tifffile supports a subset of the TIFF6 specification, mainly 8, 16, 32, and 64-bit integer, 16, 32 and 64-bit float, grayscale and multi-sample images. Specifically, CCITT and OJPEG compression, chroma subsampling without JPEG compression, color space transformations, samples with differing types, or IPTC, ICC, and XMP metadata are not implemented. Besides classic TIFF, tifffile supports several TIFF-like formats that do not strictly adhere to the TIFF6 specification. Some formats allow file and data sizes to exceed the 4 GB limit of the classic TIFF: - **BigTIFF** is identified by version number 43 and uses different file header, IFD, and tag structures with 64-bit offsets. The format also adds 64-bit data types. Tifffile can read and write BigTIFF files. - **ImageJ hyperstacks** store all image data, which may exceed 4 GB, contiguously after the first IFD. Files > 4 GB contain one IFD only. The size and shape of the up to 6-dimensional image data can be determined from the ImageDescription tag of the first IFD, which is Latin-1 encoded. Tifffile can read and write ImageJ hyperstacks. - **OME-TIFF** files store up to 8-dimensional image data in one or multiple TIFF or BigTIFF files. The UTF-8 encoded OME-XML metadata found in the ImageDescription tag of the first IFD defines the position of TIFF IFDs in the high dimensional image data. Tifffile can read OME-TIFF files (except multi-file pyramidal) and write NumPy arrays to single-file OME-TIFF. - **Micro-Manager NDTiff** stores multi-dimensional image data in one or more classic TIFF files. Metadata contained in a separate NDTiff.index binary file defines the position of the TIFF IFDs in the image array. Each TIFF file also contains metadata in a non-TIFF binary structure at offset 8. Downsampled image data of pyramidal datasets are stored in separate folders. Tifffile can read NDTiff files. Version 0 and 1 series, tiling, stitching, and multi-resolution pyramids are not supported. - **Micro-Manager MMStack** stores 6-dimensional image data in one or more classic TIFF files. Metadata contained in non-TIFF binary structures and JSON strings define the image stack dimensions and the position of the image frame data in the file and the image stack. The TIFF structures and metadata are often corrupted or wrong. Tifffile can read MMStack files. - **Carl Zeiss LSM** files store all IFDs below 4 GB and wrap around 32-bit StripOffsets pointing to image data above 4 GB. The StripOffsets of each series and position require separate unwrapping. The StripByteCounts tag contains the number of bytes for the uncompressed data. Tifffile can read LSM files of any size. - **MetaMorph Stack, STK** files contain additional image planes stored contiguously after the image data of the first page. The total number of planes is equal to the count of the UIC2tag. Tifffile can read STK files. - **ZIF**, the Zoomable Image File format, is a subspecification of BigTIFF with SGI's ImageDepth extension and additional compression schemes. Only little-endian, tiled, interleaved, 8-bit per sample images with JPEG, PNG, JPEG XR, and JPEG 2000 compression are allowed. Tifffile can read and write ZIF files. - **Hamamatsu NDPI** files use some 64-bit offsets in the file header, IFD, and tag structures. Single, LONG typed tag values can exceed 32-bit. The high bytes of 64-bit tag values and offsets are stored after IFD structures. Tifffile can read NDPI files > 4 GB. JPEG compressed segments with dimensions >65530 or missing restart markers cannot be decoded with common JPEG libraries. Tifffile works around this limitation by separately decoding the MCUs between restart markers, which performs poorly. BitsPerSample, SamplesPerPixel, and PhotometricInterpretation tags may contain wrong values, which can be corrected using the value of tag 65441. - **Philips TIFF** slides store padded ImageWidth and ImageLength tag values for tiled pages. The values can be corrected using the DICOM_PIXEL_SPACING attributes of the XML formatted description of the first page. Tile offsets and byte counts may be 0. Tifffile can read Philips slides. - **Ventana/Roche BIF** slides store tiles and metadata in a BigTIFF container. Tiles may overlap and require stitching based on the TileJointInfo elements in the XMP tag. Volumetric scans are stored using the ImageDepth extension. Tifffile can read BIF and decode individual tiles but does not perform stitching. - **ScanImage** optionally allows corrupted non-BigTIFF files > 2 GB. The values of StripOffsets and StripByteCounts can be recovered using the constant differences of the offsets of IFD and tag values throughout the file. Tifffile can read such files if the image data are stored contiguously in each page. - **GeoTIFF sparse** files allow strip or tile offsets and byte counts to be 0. Such segments are implicitly set to 0 or the NODATA value on reading. Tifffile can read GeoTIFF sparse files. - **Tifffile shaped** files store the array shape and user-provided metadata of multi-dimensional image series in JSON format in the ImageDescription tag of the first page of the series. The format allows for multiple series, SubIFDs, sparse segments with zero offset and byte count, and truncated series, where only the first page of a series is present, and the image data are stored contiguously. No other software besides Tifffile supports the truncated format. Other libraries for reading, writing, inspecting, or manipulating scientific TIFF files from Python are `aicsimageio `_, `apeer-ometiff-library `_, `bigtiff `_, `fabio.TiffIO `_, `GDAL `_, `imread `_, `large_image `_, `openslide-python `_, `opentile `_, `pylibtiff `_, `pylsm `_, `pymimage `_, `python-bioformats `_, `pytiff `_, `scanimagetiffreader-python `_, `SimpleITK `_, `slideio `_, `tiffslide `_, `tifftools `_, `tyf `_, `xtiff `_, and `ndtiff `_. References ---------- - TIFF 6.0 Specification and Supplements. Adobe Systems Incorporated. https://www.adobe.io/open/standards/TIFF.html - TIFF File Format FAQ. https://www.awaresystems.be/imaging/tiff/faq.html - The BigTIFF File Format. https://www.awaresystems.be/imaging/tiff/bigtiff.html - MetaMorph Stack (STK) Image File Format. http://mdc.custhelp.com/app/answers/detail/a_id/18862 - Image File Format Description LSM 5/7 Release 6.0 (ZEN 2010). Carl Zeiss MicroImaging GmbH. BioSciences. May 10, 2011 - The OME-TIFF format. https://docs.openmicroscopy.org/ome-model/latest/ - UltraQuant(r) Version 6.0 for Windows Start-Up Guide. http://www.ultralum.com/images%20ultralum/pdf/UQStart%20Up%20Guide.pdf - Micro-Manager File Formats. https://micro-manager.org/wiki/Micro-Manager_File_Formats - ScanImage BigTiff Specification. https://docs.scanimage.org/Appendix/ScanImage+BigTiff+Specification.html - ZIF, the Zoomable Image File format. https://zif.photo/ - GeoTIFF File Format https://gdal.org/drivers/raster/gtiff.html - Cloud optimized GeoTIFF. https://github.com/cogeotiff/cog-spec/blob/master/spec.md - Tags for TIFF and Related Specifications. Digital Preservation. https://www.loc.gov/preservation/digital/formats/content/tiff_tags.shtml - CIPA DC-008-2016: Exchangeable image file format for digital still cameras: Exif Version 2.31. http://www.cipa.jp/std/documents/e/DC-008-Translation-2016-E.pdf - The EER (Electron Event Representation) file format. https://github.com/fei-company/EerReaderLib - Digital Negative (DNG) Specification. Version 1.7.1.0, September 2023. https://helpx.adobe.com/content/dam/help/en/photoshop/pdf/DNG_Spec_1_7_1_0.pdf - Roche Digital Pathology. BIF image file format for digital pathology. https://diagnostics.roche.com/content/dam/diagnostics/Blueprint/en/pdf/rmd/Roche-Digital-Pathology-BIF-Whitepaper.pdf - Astro-TIFF specification. https://astro-tiff.sourceforge.io/ - Aperio Technologies, Inc. Digital Slides and Third-Party Data Interchange. Aperio_Digital_Slides_and_Third-party_data_interchange.pdf - PerkinElmer image format. https://downloads.openmicroscopy.org/images/Vectra-QPTIFF/perkinelmer/PKI_Image%20Format.docx - NDTiffStorage. https://github.com/micro-manager/NDTiffStorage Examples -------- Write a NumPy array to a single-page RGB TIFF file: >>> data = numpy.random.randint(0, 255, (256, 256, 3), 'uint8') >>> imwrite('temp.tif', data, photometric='rgb') Read the image from the TIFF file as NumPy array: >>> image = imread('temp.tif') >>> image.shape (256, 256, 3) Use the `photometric` and `planarconfig` arguments to write a 3x3x3 NumPy array to an interleaved RGB, a planar RGB, or a 3-page grayscale TIFF: >>> data = numpy.random.randint(0, 255, (3, 3, 3), 'uint8') >>> imwrite('temp.tif', data, photometric='rgb') >>> imwrite('temp.tif', data, photometric='rgb', planarconfig='separate') >>> imwrite('temp.tif', data, photometric='minisblack') Use the `extrasamples` argument to specify how extra components are interpreted, for example, for an RGBA image with unassociated alpha channel: >>> data = numpy.random.randint(0, 255, (256, 256, 4), 'uint8') >>> imwrite('temp.tif', data, photometric='rgb', extrasamples=['unassalpha']) Write a 3-dimensional NumPy array to a multi-page, 16-bit grayscale TIFF file: >>> data = numpy.random.randint(0, 2**12, (64, 301, 219), 'uint16') >>> imwrite('temp.tif', data, photometric='minisblack') Read the whole image stack from the multi-page TIFF file as NumPy array: >>> image_stack = imread('temp.tif') >>> image_stack.shape (64, 301, 219) >>> image_stack.dtype dtype('uint16') Read the image from the first page in the TIFF file as NumPy array: >>> image = imread('temp.tif', key=0) >>> image.shape (301, 219) Read images from a selected range of pages: >>> images = imread('temp.tif', key=range(4, 40, 2)) >>> images.shape (18, 301, 219) Iterate over all pages in the TIFF file and successively read images: >>> with TiffFile('temp.tif') as tif: ... for page in tif.pages: ... image = page.asarray() ... Get information about the image stack in the TIFF file without reading any image data: >>> tif = TiffFile('temp.tif') >>> len(tif.pages) # number of pages in the file 64 >>> page = tif.pages[0] # get shape and dtype of image in first page >>> page.shape (301, 219) >>> page.dtype dtype('uint16') >>> page.axes 'YX' >>> series = tif.series[0] # get shape and dtype of first image series >>> series.shape (64, 301, 219) >>> series.dtype dtype('uint16') >>> series.axes 'QYX' >>> tif.close() Inspect the "XResolution" tag from the first page in the TIFF file: >>> with TiffFile('temp.tif') as tif: ... tag = tif.pages[0].tags['XResolution'] ... >>> tag.value (1, 1) >>> tag.name 'XResolution' >>> tag.code 282 >>> tag.count 1 >>> tag.dtype Iterate over all tags in the TIFF file: >>> with TiffFile('temp.tif') as tif: ... for page in tif.pages: ... for tag in page.tags: ... tag_name, tag_value = tag.name, tag.value ... Overwrite the value of an existing tag, for example, XResolution: >>> with TiffFile('temp.tif', mode='r+') as tif: ... _ = tif.pages[0].tags['XResolution'].overwrite((96000, 1000)) ... Write a 5-dimensional floating-point array using BigTIFF format, separate color components, tiling, Zlib compression level 8, horizontal differencing predictor, and additional metadata: >>> data = numpy.random.rand(2, 5, 3, 301, 219).astype('float32') >>> imwrite( ... 'temp.tif', ... data, ... bigtiff=True, ... photometric='rgb', ... planarconfig='separate', ... tile=(32, 32), ... compression='zlib', ... compressionargs={'level': 8}, ... predictor=True, ... metadata={'axes': 'TZCYX'}, ... ) Write a 10 fps time series of volumes with xyz voxel size 2.6755x2.6755x3.9474 micron^3 to an ImageJ hyperstack formatted TIFF file: >>> volume = numpy.random.randn(6, 57, 256, 256).astype('float32') >>> image_labels = [f'{i}' for i in range(volume.shape[0] * volume.shape[1])] >>> imwrite( ... 'temp.tif', ... volume, ... imagej=True, ... resolution=(1.0 / 2.6755, 1.0 / 2.6755), ... metadata={ ... 'spacing': 3.947368, ... 'unit': 'um', ... 'finterval': 1 / 10, ... 'fps': 10.0, ... 'axes': 'TZYX', ... 'Labels': image_labels, ... }, ... ) Read the volume and metadata from the ImageJ hyperstack file: >>> with TiffFile('temp.tif') as tif: ... volume = tif.asarray() ... axes = tif.series[0].axes ... imagej_metadata = tif.imagej_metadata ... >>> volume.shape (6, 57, 256, 256) >>> axes 'TZYX' >>> imagej_metadata['slices'] 57 >>> imagej_metadata['frames'] 6 Memory-map the contiguous image data in the ImageJ hyperstack file: >>> memmap_volume = memmap('temp.tif') >>> memmap_volume.shape (6, 57, 256, 256) >>> del memmap_volume Create a TIFF file containing an empty image and write to the memory-mapped NumPy array (note: this does not work with compression or tiling): >>> memmap_image = memmap( ... 'temp.tif', shape=(256, 256, 3), dtype='float32', photometric='rgb' ... ) >>> type(memmap_image) >>> memmap_image[255, 255, 1] = 1.0 >>> memmap_image.flush() >>> del memmap_image Write two NumPy arrays to a multi-series TIFF file (note: other TIFF readers will not recognize the two series; use the OME-TIFF format for better interoperability): >>> series0 = numpy.random.randint(0, 255, (32, 32, 3), 'uint8') >>> series1 = numpy.random.randint(0, 255, (4, 256, 256), 'uint16') >>> with TiffWriter('temp.tif') as tif: ... tif.write(series0, photometric='rgb') ... tif.write(series1, photometric='minisblack') ... Read the second image series from the TIFF file: >>> series1 = imread('temp.tif', series=1) >>> series1.shape (4, 256, 256) Successively write the frames of one contiguous series to a TIFF file: >>> data = numpy.random.randint(0, 255, (30, 301, 219), 'uint8') >>> with TiffWriter('temp.tif') as tif: ... for frame in data: ... tif.write(frame, contiguous=True) ... Append an image series to the existing TIFF file (note: this does not work with ImageJ hyperstack or OME-TIFF files): >>> data = numpy.random.randint(0, 255, (301, 219, 3), 'uint8') >>> imwrite('temp.tif', data, photometric='rgb', append=True) Create a TIFF file from a generator of tiles: >>> data = numpy.random.randint(0, 2**12, (31, 33, 3), 'uint16') >>> def tiles(data, tileshape): ... for y in range(0, data.shape[0], tileshape[0]): ... for x in range(0, data.shape[1], tileshape[1]): ... yield data[y : y + tileshape[0], x : x + tileshape[1]] ... >>> imwrite( ... 'temp.tif', ... tiles(data, (16, 16)), ... tile=(16, 16), ... shape=data.shape, ... dtype=data.dtype, ... photometric='rgb', ... ) Write a multi-dimensional, multi-resolution (pyramidal), multi-series OME-TIFF file with metadata. Sub-resolution images are written to SubIFDs. Limit parallel encoding to 2 threads. Write a thumbnail image as a separate image series: >>> data = numpy.random.randint(0, 255, (8, 2, 512, 512, 3), 'uint8') >>> subresolutions = 2 >>> pixelsize = 0.29 # micrometer >>> with TiffWriter('temp.ome.tif', bigtiff=True) as tif: ... metadata = { ... 'axes': 'TCYXS', ... 'SignificantBits': 8, ... 'TimeIncrement': 0.1, ... 'TimeIncrementUnit': 's', ... 'PhysicalSizeX': pixelsize, ... 'PhysicalSizeXUnit': 'µm', ... 'PhysicalSizeY': pixelsize, ... 'PhysicalSizeYUnit': 'µm', ... 'Channel': {'Name': ['Channel 1', 'Channel 2']}, ... 'Plane': {'PositionX': [0.0] * 16, 'PositionXUnit': ['µm'] * 16}, ... } ... options = dict( ... photometric='rgb', ... tile=(128, 128), ... compression='jpeg', ... resolutionunit='CENTIMETER', ... maxworkers=2, ... ) ... tif.write( ... data, ... subifds=subresolutions, ... resolution=(1e4 / pixelsize, 1e4 / pixelsize), ... metadata=metadata, ... **options, ... ) ... # write pyramid levels to the two subifds ... # in production use resampling to generate sub-resolution images ... for level in range(subresolutions): ... mag = 2 ** (level + 1) ... tif.write( ... data[..., ::mag, ::mag, :], ... subfiletype=1, ... resolution=(1e4 / mag / pixelsize, 1e4 / mag / pixelsize), ... **options, ... ) ... # add a thumbnail image as a separate series ... # it is recognized by QuPath as an associated image ... thumbnail = (data[0, 0, ::8, ::8] >> 2).astype('uint8') ... tif.write(thumbnail, metadata={'Name': 'thumbnail'}) ... Access the image levels in the pyramidal OME-TIFF file: >>> baseimage = imread('temp.ome.tif') >>> second_level = imread('temp.ome.tif', series=0, level=1) >>> with TiffFile('temp.ome.tif') as tif: ... baseimage = tif.series[0].asarray() ... second_level = tif.series[0].levels[1].asarray() ... number_levels = len(tif.series[0].levels) # includes base level ... Iterate over and decode single JPEG compressed tiles in the TIFF file: >>> with TiffFile('temp.ome.tif') as tif: ... fh = tif.filehandle ... for page in tif.pages: ... for index, (offset, bytecount) in enumerate( ... zip(page.dataoffsets, page.databytecounts) ... ): ... _ = fh.seek(offset) ... data = fh.read(bytecount) ... tile, indices, shape = page.decode( ... data, index, jpegtables=page.jpegtables ... ) ... Use Zarr to read parts of the tiled, pyramidal images in the TIFF file: >>> import zarr >>> store = imread('temp.ome.tif', aszarr=True) >>> z = zarr.open(store, mode='r') >>> z >>> z[0] # base layer >>> z[0][2, 0, 128:384, 256:].shape # read a tile from the base layer (256, 256, 3) >>> store.close() Load the base layer from the Zarr store as a dask array: >>> import dask.array >>> store = imread('temp.ome.tif', aszarr=True) >>> dask.array.from_zarr(store, 0) dask.array<...shape=(8, 2, 512, 512, 3)...chunksize=(1, 1, 128, 128, 3)... >>> store.close() Write the Zarr store to a fsspec ReferenceFileSystem in JSON format: >>> store = imread('temp.ome.tif', aszarr=True) >>> store.write_fsspec('temp.ome.tif.json', url='file://') >>> store.close() Open the fsspec ReferenceFileSystem as a Zarr group: >>> import fsspec >>> import imagecodecs.numcodecs >>> imagecodecs.numcodecs.register_codecs() >>> mapper = fsspec.get_mapper( ... 'reference://', fo='temp.ome.tif.json', target_protocol='file' ... ) >>> z = zarr.open(mapper, mode='r') >>> z Create an OME-TIFF file containing an empty, tiled image series and write to it via the Zarr interface (note: this does not work with compression): >>> imwrite( ... 'temp.ome.tif', ... shape=(8, 800, 600), ... dtype='uint16', ... photometric='minisblack', ... tile=(128, 128), ... metadata={'axes': 'CYX'}, ... ) >>> store = imread('temp.ome.tif', mode='r+', aszarr=True) >>> z = zarr.open(store, mode='r+') >>> z >>> z[3, 100:200, 200:300:2] = 1024 >>> store.close() Read images from a sequence of TIFF files as NumPy array using two I/O worker threads: >>> imwrite('temp_C001T001.tif', numpy.random.rand(64, 64)) >>> imwrite('temp_C001T002.tif', numpy.random.rand(64, 64)) >>> image_sequence = imread( ... ['temp_C001T001.tif', 'temp_C001T002.tif'], ioworkers=2, maxworkers=1 ... ) >>> image_sequence.shape (2, 64, 64) >>> image_sequence.dtype dtype('float64') Read an image stack from a series of TIFF files with a file name pattern as NumPy or Zarr arrays: >>> image_sequence = TiffSequence('temp_C0*.tif', pattern=r'_(C)(\d+)(T)(\d+)') >>> image_sequence.shape (1, 2) >>> image_sequence.axes 'CT' >>> data = image_sequence.asarray() >>> data.shape (1, 2, 64, 64) >>> store = image_sequence.aszarr() >>> zarr.open(store, mode='r') >>> image_sequence.close() Write the Zarr store to a fsspec ReferenceFileSystem in JSON format: >>> store = image_sequence.aszarr() >>> store.write_fsspec('temp.json', url='file://') Open the fsspec ReferenceFileSystem as a Zarr array: >>> import fsspec >>> import tifffile.numcodecs >>> tifffile.numcodecs.register_codec() >>> mapper = fsspec.get_mapper( ... 'reference://', fo='temp.json', target_protocol='file' ... ) >>> zarr.open(mapper, mode='r') Inspect the TIFF file from the command line:: $ python -m tifffile temp.ome.tif """ from __future__ import annotations __version__ = '2024.5.22' __all__ = [ 'TiffFile', 'TiffFileError', 'TiffFrame', 'TiffPage', 'TiffPages', 'TiffPageSeries', 'TiffReader', 'TiffSequence', 'TiffTag', 'TiffTags', 'TiffTagRegistry', 'TiffWriter', 'TiffFormat', 'ZarrFileSequenceStore', 'ZarrStore', 'ZarrTiffStore', 'imread', 'imshow', 'imwrite', 'lsm2bin', 'memmap', 'read_ndtiff_index', 'read_gdal_structural_metadata', 'read_micromanager_metadata', 'read_scanimage_metadata', 'tiff2fsspec', 'tiffcomment', 'TIFF', 'DATATYPE', 'CHUNKMODE', 'COMPRESSION', 'EXTRASAMPLE', 'FILETYPE', 'FILLORDER', 'OFILETYPE', 'ORIENTATION', 'PHOTOMETRIC', 'PLANARCONFIG', 'PREDICTOR', 'RESUNIT', 'SAMPLEFORMAT', 'OmeXml', 'OmeXmlError', 'FileCache', 'FileHandle', 'FileSequence', 'StoredShape', 'TiledSequence', 'NullContext', 'Timer', 'askopenfilename', 'astype', 'create_output', 'enumarg', 'enumstr', 'format_size', 'hexdump', 'imagej_description', 'imagej_metadata_tag', 'logger', 'matlabstr2py', 'natural_sorted', 'nullfunc', 'parse_filenames', 'parse_kwargs', 'pformat', 'product', 'repeat_nd', 'reshape_axes', 'reshape_nd', 'stripnull', 'strptime', 'transpose_axes', 'update_kwargs', 'validate_jhove', 'xml2dict', '_TIFF', # private # deprecated 'lazyattr', 'imsave', 'squeeze_axes', ] import binascii import collections import datetime import enum import glob import io import json import logging import math import os import re import struct import sys import threading import time import warnings from concurrent.futures import ThreadPoolExecutor from functools import cached_property import numpy try: import imagecodecs except ImportError: # load pure Python implementation of some codecs try: from . import _imagecodecs as imagecodecs # type: ignore except ImportError: import _imagecodecs as imagecodecs # type: ignore from typing import TYPE_CHECKING, BinaryIO, cast, final, overload if TYPE_CHECKING: from collections.abc import ( Callable, Collection, Container, ItemsView, Iterable, Iterator, KeysView, Mapping, Sequence, ValuesView, ) from typing import Any, Literal, Optional, TextIO, Union from numpy.typing import ArrayLike, DTypeLike, NDArray ByteOrder = Literal['>', '<'] OutputType = Union[str, BinaryIO, NDArray[Any], None] TagTuple = tuple[ Union[int, str], Union[int, str], Optional[int], Any, bool ] @overload def imread( files: ( str | os.PathLike[Any] | FileHandle | BinaryIO | Sequence[str | os.PathLike[Any]] | None ) = None, *, selection: Any | None = None, # TODO: type this aszarr: Literal[False] = ..., key: int | slice | Iterable[int] | None = None, series: int | None = None, level: int | None = None, squeeze: bool | None = None, maxworkers: int | None = None, buffersize: int | None = None, mode: Literal['r', 'r+'] | None = None, name: str | None = None, offset: int | None = None, size: int | None = None, pattern: str | None = None, axesorder: Sequence[int] | None = None, categories: dict[str, dict[str, int]] | None = None, imread: Callable[..., NDArray[Any]] | None = None, sort: Callable[..., Any] | bool | None = None, container: str | os.PathLike[Any] | None = None, chunkshape: tuple[int, ...] | None = None, dtype: DTypeLike | None = None, axestiled: dict[int, int] | Sequence[tuple[int, int]] | None = None, ioworkers: int | None = 1, chunkmode: CHUNKMODE | int | str | None = None, fillvalue: int | float | None = None, zattrs: dict[str, Any] | None = None, multiscales: bool | None = None, omexml: str | None = None, out: OutputType = None, out_inplace: bool | None = None, _multifile: bool | None = None, _useframes: bool | None = None, **kwargs: Any, ) -> NDArray[Any]: ... @overload def imread( files: ( str | os.PathLike[Any] | FileHandle | BinaryIO | Sequence[str | os.PathLike[Any]] | None ) = None, *, selection: Any | None = None, # TODO: type this aszarr: Literal[True], key: int | slice | Iterable[int] | None = None, series: int | None = None, level: int | None = None, squeeze: bool | None = None, maxworkers: int | None = None, buffersize: int | None = None, mode: Literal['r', 'r+'] | None = None, name: str | None = None, offset: int | None = None, size: int | None = None, pattern: str | None = None, axesorder: Sequence[int] | None = None, categories: dict[str, dict[str, int]] | None = None, imread: Callable[..., NDArray[Any]] | None = None, imreadargs: dict[str, Any] | None = None, sort: Callable[..., Any] | bool | None = None, container: str | os.PathLike[Any] | None = None, chunkshape: tuple[int, ...] | None = None, chunkdtype: DTypeLike | None = None, dtype: DTypeLike | None = None, # deprecated axestiled: dict[int, int] | Sequence[tuple[int, int]] | None = None, ioworkers: int | None = 1, chunkmode: CHUNKMODE | int | str | None = None, fillvalue: int | float | None = None, zattrs: dict[str, Any] | None = None, multiscales: bool | None = None, omexml: str | None = None, out: OutputType = None, out_inplace: bool | None = None, _multifile: bool | None = None, _useframes: bool | None = None, **kwargs: Any, ) -> ZarrTiffStore | ZarrFileSequenceStore: ... @overload def imread( files: ( str | os.PathLike[Any] | FileHandle | BinaryIO | Sequence[str | os.PathLike[Any]] | None ) = None, *, selection: Any | None = None, # TODO: type this aszarr: bool = False, key: int | slice | Iterable[int] | None = None, series: int | None = None, level: int | None = None, squeeze: bool | None = None, maxworkers: int | None = None, buffersize: int | None = None, mode: Literal['r', 'r+'] | None = None, name: str | None = None, offset: int | None = None, size: int | None = None, pattern: str | None = None, axesorder: Sequence[int] | None = None, categories: dict[str, dict[str, int]] | None = None, imread: Callable[..., NDArray[Any]] | None = None, imreadargs: dict[str, Any] | None = None, sort: Callable[..., Any] | bool | None = None, container: str | os.PathLike[Any] | None = None, chunkshape: tuple[int, ...] | None = None, chunkdtype: DTypeLike | None = None, dtype: DTypeLike | None = None, # deprecated axestiled: dict[int, int] | Sequence[tuple[int, int]] | None = None, ioworkers: int | None = 1, chunkmode: CHUNKMODE | int | str | None = None, fillvalue: int | float | None = None, zattrs: dict[str, Any] | None = None, multiscales: bool | None = None, omexml: str | None = None, out: OutputType = None, out_inplace: bool | None = None, _multifile: bool | None = None, _useframes: bool | None = None, **kwargs: Any, ) -> NDArray[Any] | ZarrTiffStore | ZarrFileSequenceStore: ... def imread( files: ( str | os.PathLike[Any] | FileHandle | BinaryIO | Sequence[str | os.PathLike[Any]] | None ) = None, *, selection: Any | None = None, # TODO: type this aszarr: bool = False, key: int | slice | Iterable[int] | None = None, series: int | None = None, level: int | None = None, squeeze: bool | None = None, maxworkers: int | None = None, buffersize: int | None = None, mode: Literal['r', 'r+'] | None = None, name: str | None = None, offset: int | None = None, size: int | None = None, pattern: str | None = None, axesorder: Sequence[int] | None = None, categories: dict[str, dict[str, int]] | None = None, imread: Callable[..., NDArray[Any]] | None = None, imreadargs: dict[str, Any] | None = None, sort: Callable[..., Any] | bool | None = None, container: str | os.PathLike[Any] | None = None, chunkshape: tuple[int, ...] | None = None, chunkdtype: DTypeLike | None = None, dtype: DTypeLike | None = None, # deprecated axestiled: dict[int, int] | Sequence[tuple[int, int]] | None = None, ioworkers: int | None = 1, chunkmode: CHUNKMODE | int | str | None = None, fillvalue: int | float | None = None, zattrs: dict[str, Any] | None = None, multiscales: bool | None = None, omexml: str | None = None, out: OutputType = None, out_inplace: bool | None = None, _multifile: bool | None = None, _useframes: bool | None = None, **kwargs: Any, ) -> NDArray[Any] | ZarrTiffStore | ZarrFileSequenceStore: """Return image from TIFF file(s) as NumPy array or Zarr store. The first image series in the file(s) is returned by default. Parameters: files: File name, seekable binary stream, glob pattern, or sequence of file names. May be *None* if `container` is specified. selection: Subset of image to be extracted. If not None, a Zarr array is created, indexed with the `selection` value, and returned as a NumPy array. Only segments that are part of the selection will be read from file. Refer to the Zarr documentation for valid selections. Depending on selection size, image size, and storage properties, it may be more efficient to read the whole image from file and then index it. aszarr: Return file sequences, series, or single pages as Zarr store instead of NumPy array if `selection` is None. mode, name, offset, size, omexml, _multifile, _useframes: Passed to :py:class:`TiffFile`. key, series, level, squeeze, maxworkers, buffersize: Passed to :py:meth:`TiffFile.asarray` or :py:meth:`TiffFile.aszarr`. imread, container, sort, pattern, axesorder, axestiled, categories,\ ioworkers: Passed to :py:class:`FileSequence`. chunkmode, fillvalue, zattrs, multiscales: Passed to :py:class:`ZarrTiffStore` or :py:class:`ZarrFileSequenceStore`. chunkshape, chunkdtype: Passed to :py:meth:`FileSequence.asarray` or :py:class:`ZarrFileSequenceStore`. out_inplace: Passed to :py:meth:`FileSequence.asarray` out: Passed to :py:meth:`TiffFile.asarray`, :py:meth:`FileSequence.asarray`, or :py:func:`zarr_selection`. imreadargs: Additional arguments passed to :py:attr:`FileSequence.imread`. **kwargs: Additional arguments passed to :py:class:`TiffFile` or :py:attr:`FileSequence.imread`. Returns: Images from specified files, series, or pages. Zarr store instances must be closed after use. See :py:meth:`TiffPage.asarray` for operations that are applied (or not) to the image data stored in the file. """ store: ZarrStore aszarr = aszarr or (selection is not None) is_flags = parse_kwargs(kwargs, *(k for k in kwargs if k[:3] == 'is_')) if dtype is not None: warnings.warn( ' the dtype argument is ' 'deprecated since 2024.2.12. Use chunkdtype', DeprecationWarning, stacklevel=2, ) chunkdtype = dtype del dtype if imread is None and kwargs: raise TypeError( 'imread() got unexpected keyword arguments ' + ', '.join(f"'{key}'" for key in kwargs) ) if container is None: if isinstance(files, str) and ('*' in files or '?' in files): files = glob.glob(files) if not files: raise ValueError('no files found') if ( isinstance(files, collections.abc.Sequence) and not isinstance(files, str) and len(files) == 1 ): files = files[0] if isinstance(files, str) or not isinstance( files, collections.abc.Sequence ): with TiffFile( files, mode=mode, name=name, offset=offset, size=size, omexml=omexml, _multifile=_multifile, _useframes=_useframes, **is_flags, ) as tif: if aszarr: assert key is None or isinstance(key, int) store = tif.aszarr( key=key, series=series, level=level, squeeze=squeeze, maxworkers=maxworkers, buffersize=buffersize, chunkmode=chunkmode, fillvalue=fillvalue, zattrs=zattrs, multiscales=multiscales, ) if selection is None: return store return zarr_selection(store, selection, out=out) return tif.asarray( key=key, series=series, level=level, squeeze=squeeze, maxworkers=maxworkers, buffersize=buffersize, out=out, ) elif isinstance(files, (FileHandle, BinaryIO)): raise ValueError('BinaryIO not supported') imread_kwargs = kwargs_notnone( key=key, series=series, level=level, squeeze=squeeze, maxworkers=maxworkers, buffersize=buffersize, imreadargs=imreadargs, _multifile=_multifile, _useframes=_useframes, **is_flags, **kwargs, ) with TiffSequence( files, pattern=pattern, axesorder=axesorder, categories=categories, container=container, sort=sort, **kwargs_notnone(imread=imread), ) as imseq: if aszarr: store = imseq.aszarr( axestiled=axestiled, chunkmode=chunkmode, chunkshape=chunkshape, chunkdtype=chunkdtype, fillvalue=fillvalue, zattrs=zattrs, **imread_kwargs, ) if selection is None: return store return zarr_selection(store, selection, out=out) return imseq.asarray( axestiled=axestiled, chunkshape=chunkshape, chunkdtype=chunkdtype, ioworkers=ioworkers, out=out, out_inplace=out_inplace, **imread_kwargs, ) def imwrite( file: str | os.PathLike[Any] | FileHandle | BinaryIO, /, data: ( ArrayLike | Iterator[NDArray[Any] | None] | Iterator[bytes] | None ) = None, *, mode: Literal['w', 'x', 'r+'] | None = None, bigtiff: bool | None = None, byteorder: ByteOrder | None = None, imagej: bool = False, ome: bool | None = None, shaped: bool | None = None, append: bool = False, shape: Sequence[int] | None = None, dtype: DTypeLike | None = None, photometric: PHOTOMETRIC | int | str | None = None, planarconfig: PLANARCONFIG | int | str | None = None, extrasamples: Sequence[EXTRASAMPLE | int | str] | None = None, volumetric: bool = False, tile: Sequence[int] | None = None, rowsperstrip: int | None = None, bitspersample: int | None = None, compression: COMPRESSION | int | str | None = None, compressionargs: dict[str, Any] | None = None, predictor: PREDICTOR | int | str | bool | None = None, subsampling: tuple[int, int] | None = None, jpegtables: bytes | None = None, iccprofile: bytes | None = None, colormap: ArrayLike | None = None, description: str | bytes | None = None, datetime: str | bool | None = None, resolution: ( tuple[float | tuple[int, int], float | tuple[int, int]] | None ) = None, resolutionunit: RESUNIT | int | str | None = None, subfiletype: FILETYPE | int | None = None, software: str | bytes | bool | None = None, # subifds: int | Sequence[int] | None = None, metadata: dict[str, Any] | None = {}, extratags: Sequence[TagTuple] | None = None, contiguous: bool = False, truncate: bool = False, align: int | None = None, maxworkers: int | None = None, buffersize: int | None = None, returnoffset: bool = False, ) -> tuple[int, int] | None: """Write NumPy array to TIFF file. A BigTIFF file is written if the data size is larger than 4 GB less 32 MB for metadata, and `bigtiff` is not *False*, and `imagej`, `truncate` and `compression` are not enabled. Unless `byteorder` is specified, the TIFF file byte order is determined from the dtype of `data` or the `dtype` argument. Parameters: file: Passed to :py:class:`TiffWriter`. data, shape, dtype: Passed to :py:meth:`TiffWriter.write`. mode, append, byteorder, bigtiff, imagej, ome, shaped: Passed to :py:class:`TiffWriter`. photometric, planarconfig, extrasamples, volumetric, tile,\ rowsperstrip, bitspersample, compression, compressionargs, predictor,\ subsampling, jpegtables, iccprofile, colormap, description, datetime,\ resolution, resolutionunit, subfiletype, software,\ metadata, extratags, maxworkers, buffersize, \ contiguous, truncate, align: Passed to :py:meth:`TiffWriter.write`. returnoffset: Return offset and number of bytes of memory-mappable image data in file. Returns: If `returnoffset` is *True* and the image data in the file are memory-mappable, the offset and number of bytes of the image data in the file. """ if data is None: # write empty file if shape is None or dtype is None: raise ValueError("missing required 'shape' or 'dtype' argument") dtype = numpy.dtype(dtype) shape = tuple(shape) datasize = product(shape) * dtype.itemsize if byteorder is None: byteorder = dtype.byteorder # type: ignore else: # try: datasize = data.nbytes # type: ignore if byteorder is None: byteorder = data.dtype.byteorder # type: ignore except Exception: datasize = 0 if bigtiff is None: bigtiff = ( datasize > 2**32 - 2**25 and not imagej and not truncate and compression in {None, 0, 1, 'NONE', 'none'} ) with TiffWriter( file, mode=mode, bigtiff=bigtiff, byteorder=byteorder, append=append, imagej=imagej, ome=ome, shaped=shaped, ) as tif: result = tif.write( data, shape=shape, dtype=dtype, photometric=photometric, planarconfig=planarconfig, extrasamples=extrasamples, volumetric=volumetric, tile=tile, rowsperstrip=rowsperstrip, bitspersample=bitspersample, compression=compression, compressionargs=compressionargs, predictor=predictor, subsampling=subsampling, jpegtables=jpegtables, iccprofile=iccprofile, colormap=colormap, description=description, datetime=datetime, resolution=resolution, resolutionunit=resolutionunit, subfiletype=subfiletype, software=software, metadata=metadata, extratags=extratags, contiguous=contiguous, truncate=truncate, align=align, maxworkers=maxworkers, buffersize=buffersize, returnoffset=returnoffset, ) return result def imsave(*args: Any, **kwargs: Any) -> None: """Deprecated. Use :py:func:`imwrite`. :meta private: """ warnings.warn( ' is deprecated. Use tifffile.imwrite', DeprecationWarning, stacklevel=2, ) imwrite(*args, **kwargs) def memmap( filename: str | os.PathLike[Any], /, *, shape: Sequence[int] | None = None, dtype: numpy.dtype[Any] | None = None, page: int | None = None, series: int = 0, level: int = 0, mode: Literal['r+', 'r', 'c'] = 'r+', **kwargs: Any, ) -> numpy.memmap: """Return memory-mapped NumPy array of image data stored in TIFF file. Memory-mapping requires the image data stored in native byte order, without tiling, compression, predictors, etc. If `shape` and `dtype` are provided, existing files are overwritten or appended to depending on the `append` argument. Else, the image data of a specified page or series in an existing file are memory-mapped. By default, the image data of the first series are memory-mapped. Call `flush` to write any changes in the array to the file. Parameters: filename: Name of TIFF file which stores array. shape: Shape of empty array. dtype: Datatype of empty array. page: Index of page which image data to memory-map. series: Index of page series which image data to memory-map. level: Index of pyramid level which image data to memory-map. mode: Memory-map file open mode. The default is 'r+', which opens existing file for reading and writing. **kwargs: Additional arguments passed to :py:func:`imwrite` or :py:class:`TiffFile`. Returns: Image in TIFF file as memory-mapped NumPy array. Raises: ValueError: Image data in TIFF file are not memory-mappable. """ filename = os.fspath(filename) if shape is not None: shape = tuple(shape) if shape is not None and dtype is not None: # create a new, empty array kwargs.update( data=None, shape=shape, dtype=dtype, align=TIFF.ALLOCATIONGRANULARITY, returnoffset=True, ) result = imwrite(filename, **kwargs) if result is None: # TODO: fail before creating file or writing data raise ValueError('image data are not memory-mappable') offset = result[0] else: # use existing file with TiffFile(filename, **kwargs) as tif: if page is None: tiffseries = tif.series[series].levels[level] if tiffseries.dataoffset is None: raise ValueError('image data are not memory-mappable') shape = tiffseries.shape dtype = tiffseries.dtype offset = tiffseries.dataoffset else: tiffpage = tif.pages[page] if not tiffpage.is_memmappable: raise ValueError('image data are not memory-mappable') offset = tiffpage.dataoffsets[0] shape = tiffpage.shape dtype = tiffpage.dtype assert dtype is not None dtype = numpy.dtype(tif.byteorder + dtype.char) return numpy.memmap(filename, dtype, mode, offset, shape, 'C') class TiffFileError(Exception): """Exception to indicate invalid TIFF structure.""" @final class TiffWriter: """Write NumPy arrays to TIFF file. TiffWriter's main purpose is saving multi-dimensional NumPy arrays in TIFF containers, not to create any possible TIFF format. Specifically, ExifIFD and GPSIFD tags are not supported. TiffWriter instances must be closed with :py:meth:`TiffWriter.close`, which is automatically called when using the 'with' context manager. TiffWriter instances are not thread-safe. All attributes are read-only. Parameters: file: Specifies file to write. mode: Binary file open mode if `file` is file name. The default is 'w', which opens files for writing, truncating existing files. 'x' opens files for exclusive creation, failing on existing files. 'r+' opens files for updating, enabling `append`. bigtiff: Write 64-bit BigTIFF formatted file, which can exceed 4 GB. By default, a classic 32-bit TIFF file is written, which is limited to 4 GB. If `append` is *True*, the existing file's format is used. byteorder: Endianness of TIFF format. One of '<', '>', '=', or '|'. The default is the system's native byte order. append: If `file` is existing standard TIFF file, append image data and tags to file. Parameters `bigtiff` and `byteorder` set from existing file. Appending does not scale well with the number of pages already in the file and may corrupt specifically formatted TIFF files such as OME-TIFF, LSM, STK, ImageJ, or FluoView. imagej: Write ImageJ hyperstack compatible file if `ome` is not enabled. This format can handle data types uint8, uint16, or float32 and data shapes up to 6 dimensions in TZCYXS order. RGB images (S=3 or S=4) must be `uint8`. ImageJ's default byte order is big-endian, but this implementation uses the system's native byte order by default. ImageJ hyperstacks do not support BigTIFF or compression. The ImageJ file format is undocumented. Use FIJI's Bio-Formats import function for compressed files. ome: Write OME-TIFF compatible file. By default, the OME-TIFF format is used if the file name extension contains '.ome.', `imagej` is not enabled, and the `description` argument in the first call of :py:meth:`TiffWriter.write` is not specified. The format supports multiple, up to 9 dimensional image series. The default axes order is TZC(S)YX(S). Refer to the OME model for restrictions of this format. shaped: Write tifffile "shaped" compatible file. The shape of multi-dimensional images is stored in JSON format in a ImageDescription tag of the first page of a series. This is the default format used by tifffile unless `imagej` or `ome` are enabled or ``metadata=None`` is passed to :py:meth:`TiffWriter.write`. Raises: ValueError: The TIFF file cannot be appended to. Use ``append='force'`` to force appending, which may result in a corrupted file. """ tiff: TiffFormat """Format of TIFF file being written.""" _fh: FileHandle _omexml: OmeXml | None _ome: bool | None # writing OME-TIFF format _imagej: bool # writing ImageJ format _tifffile: bool # writing Tifffile shaped format _truncate: bool _metadata: dict[str, Any] | None _colormap: NDArray[numpy.uint16] | None _tags: list[tuple[int, bytes, Any, bool]] | None _datashape: tuple[int, ...] | None # shape of data in consecutive pages _datadtype: numpy.dtype[Any] | None # data type _dataoffset: int | None # offset to data _databytecounts: list[int] | None # byte counts per plane _dataoffsetstag: int | None # strip or tile offset tag code _descriptiontag: TiffTag | None # TiffTag for updating comment _ifdoffset: int _subifds: int # number of subifds _subifdslevel: int # index of current subifd level _subifdsoffsets: list[int] # offsets to offsets to subifds _nextifdoffsets: list[int] # offsets to offset to next ifd _ifdindex: int # index of current ifd _storedshape: StoredShape | None # normalized shape in consecutive pages def __init__( self, file: str | os.PathLike[Any] | FileHandle | BinaryIO, /, *, mode: Literal['w', 'x', 'r+'] | None = None, bigtiff: bool = False, byteorder: ByteOrder | None = None, append: bool | str = False, imagej: bool = False, ome: bool | None = None, shaped: bool | None = None, ) -> None: if mode in {'r+', 'r+b'} or ( isinstance(file, FileHandle) and file._mode == 'r+b' ): mode = 'r+' append = True if append: # determine if file is an existing TIFF file that can be extended try: with FileHandle(file, mode='rb', size=0) as fh: pos = fh.tell() try: with TiffFile(fh) as tif: if append != 'force' and not tif.is_appendable: raise ValueError( 'cannot append to file containing metadata' ) byteorder = tif.byteorder bigtiff = tif.is_bigtiff self._ifdoffset = cast( int, tif.pages.next_page_offset ) finally: fh.seek(pos) append = True except (OSError, FileNotFoundError): append = False if append: if mode not in {None, 'r+', 'r+b'}: raise ValueError("append mode must be 'r+'") mode = 'r+' elif mode is None: mode = 'w' if byteorder in {None, '=', '|'}: byteorder = '<' if sys.byteorder == 'little' else '>' elif byteorder not in {'<', '>'}: raise ValueError(f'invalid byteorder {byteorder}') if byteorder == '<': self.tiff = TIFF.BIG_LE if bigtiff else TIFF.CLASSIC_LE else: self.tiff = TIFF.BIG_BE if bigtiff else TIFF.CLASSIC_BE self._truncate = False self._metadata = None self._colormap = None self._tags = None self._datashape = None self._datadtype = None self._dataoffset = None self._databytecounts = None self._dataoffsetstag = None self._descriptiontag = None self._subifds = 0 self._subifdslevel = -1 self._subifdsoffsets = [] self._nextifdoffsets = [] self._ifdindex = 0 self._omexml = None self._storedshape = None self._fh = FileHandle(file, mode=mode, size=0) if append: self._fh.seek(0, os.SEEK_END) else: self._fh.write({'<': b'II', '>': b'MM'}[byteorder]) if bigtiff: self._fh.write(struct.pack(byteorder + 'HHH', 43, 8, 0)) else: self._fh.write(struct.pack(byteorder + 'H', 42)) # first IFD self._ifdoffset = self._fh.tell() self._fh.write(struct.pack(self.tiff.offsetformat, 0)) self._ome = None if ome is None else bool(ome) self._imagej = False if self._ome else bool(imagej) if self._imagej: self._ome = False if self._ome or self._imagej: self._tifffile = False else: self._tifffile = True if shaped is None else bool(shaped) if imagej and bigtiff: warnings.warn( f'{self!r} writing nonconformant BigTIFF ImageJ', UserWarning ) def write( self, data: ( ArrayLike | Iterator[NDArray[Any] | None] | Iterator[bytes] | None ) = None, *, shape: Sequence[int] | None = None, dtype: DTypeLike | None = None, photometric: PHOTOMETRIC | int | str | None = None, planarconfig: PLANARCONFIG | int | str | None = None, extrasamples: Sequence[EXTRASAMPLE | int | str] | None = None, volumetric: bool = False, tile: Sequence[int] | None = None, rowsperstrip: int | None = None, bitspersample: int | None = None, compression: COMPRESSION | int | str | bool | None = None, compressionargs: dict[str, Any] | None = None, predictor: PREDICTOR | int | str | bool | None = None, subsampling: tuple[int, int] | None = None, jpegtables: bytes | None = None, iccprofile: bytes | None = None, colormap: ArrayLike | None = None, description: str | bytes | None = None, datetime: str | bool | None = None, resolution: ( tuple[float | tuple[int, int], float | tuple[int, int]] | None ) = None, resolutionunit: RESUNIT | int | str | None = None, subfiletype: FILETYPE | int | None = None, software: str | bytes | bool | None = None, subifds: int | Sequence[int] | None = None, metadata: dict[str, Any] | None = {}, extratags: Sequence[TagTuple] | None = None, contiguous: bool = False, truncate: bool = False, align: int | None = None, maxworkers: int | None = None, buffersize: int | None = None, returnoffset: bool = False, ) -> tuple[int, int] | None: r"""Write multi-dimensional image to series of TIFF pages. Metadata in JSON, ImageJ, or OME-XML format are written to the ImageDescription tag of the first page of a series by default, such that the image can later be read back as an array of the same shape. The values of the ImageWidth, ImageLength, ImageDepth, and SamplesPerPixel tags are inferred from the last dimensions of the data's shape. The value of the SampleFormat tag is inferred from the data's dtype. Image data are written uncompressed in one strip per plane by default. Dimensions higher than 2 to 4 (depending on photometric mode, planar configuration, and volumetric mode) are flattened and written as separate pages. If the data size is zero, write a single page with shape (0, 0). Parameters: data: Specifies image to write. If *None*, an empty image is written, which size and type must be specified using `shape` and `dtype` arguments. This option cannot be used with compression, predictors, packed integers, or bilevel images. A copy of array-like data is made if it is not a C-contiguous numpy or dask array with the same byteorder as the TIFF file. Iterators must yield ndarrays or bytes compatible with the file's byteorder as well as the `shape` and `dtype` arguments. Iterator bytes must be compatible with the `compression`, `predictor`, `subsampling`, and `jpegtables` arguments. If `tile` is specified, iterator items must match the tile shape. Incomplete tiles are zero-padded. Iterators of non-tiled images must yield ndarrays of `shape[1:]` or strips as bytes. Iterators of strip ndarrays are not supported. Writing dask arrays might be excruciatingly slow for arrays with many chunks or files with many segments. (https://github.com/dask/dask/issues/8570). shape: Shape of image to write. The default is inferred from the `data` argument if possible. A ValueError is raised if the value is incompatible with the `data` or other arguments. dtype: NumPy data type of image to write. The default is inferred from the `data` argument if possible. A ValueError is raised if the value is incompatible with the `data` argument. photometric: Color space of image. The default is inferred from the data shape, dtype, and the `colormap` argument. A UserWarning is logged if RGB color space is auto-detected. Specify this parameter to silence the warning and to avoid ambiguities. *MINISBLACK*: for bilevel and grayscale images, 0 is black. *MINISWHITE*: for bilevel and grayscale images, 0 is white. *RGB*: the image contains red, green and blue samples. *SEPARATED*: the image contains CMYK samples. *PALETTE*: the image is used as an index into a colormap. *CFA*: the image is a Color Filter Array. The CFARepeatPatternDim, CFAPattern, and other DNG or TIFF/EP tags must be specified in `extratags` to produce a valid file. The value is written to the PhotometricInterpretation tag. planarconfig: Specifies if samples are stored interleaved or in separate planes. *CONTIG*: the last dimension contains samples. *SEPARATE*: the 3rd or 4th last dimension contains samples. The default is inferred from the data shape and `photometric` mode. If this parameter is set, extra samples are used to store grayscale images. The value is written to the PlanarConfiguration tag. extrasamples: Interpretation of extra components in pixels. *UNSPECIFIED*: no transparency information (default). *ASSOCALPHA*: true transparency with premultiplied color. *UNASSALPHA*: independent transparency masks. The values are written to the ExtraSamples tag. volumetric: Write volumetric image to single page (instead of multiple pages) using SGI ImageDepth tag. The volumetric format is not part of the TIFF specification, and few software can read it. OME and ImageJ formats are not compatible with volumetric storage. tile: Shape ([depth,] length, width) of image tiles to write. By default, image data are written in strips. The tile length and width must be a multiple of 16. If a tile depth is provided, the SGI ImageDepth and TileDepth tags are used to write volumetric data. Tiles cannot be used to write contiguous series, except if the tile shape matches the data shape. The values are written to the TileWidth, TileLength, and TileDepth tags. rowsperstrip: Number of rows per strip. By default, strips are about 256 KB if `compression` is enabled, else rowsperstrip is set to the image length. The value is written to the RowsPerStrip tag. bitspersample: Number of bits per sample. The default is the number of bits of the data's dtype. Different values per samples are not supported. Unsigned integer data are packed into bytes as tightly as possible. Valid values are 1-8 for uint8, 9-16 for uint16, and 17-32 for uint32. This setting cannot be used with compression, contiguous series, or empty files. The value is written to the BitsPerSample tag. compression: Compression scheme used on image data. By default, image data are written uncompressed. Compression cannot be used to write contiguous series. Compressors may require certain data shapes, types or value ranges. For example, JPEG compression requires grayscale or RGB(A), uint8 or 12-bit uint16. JPEG compression is experimental. JPEG markers and TIFF tags may not match. Only a limited set of compression schemes are implemented. 'ZLIB' is short for ADOBE_DEFLATE. The value is written to the Compression tag. compressionargs: Extra arguments passed to compression codec, for example, compression level. Refer to the Imagecodecs implementation for supported arguments. predictor: Horizontal differencing operator applied to image data before compression. By default, no operator is applied. Predictors can only be used with certain compression schemes and data types. The value is written to the Predictor tag. subsampling: Horizontal and vertical subsampling factors used for the chrominance components of images: (1, 1), (2, 1), (2, 2), or (4, 1). The default is *(2, 2)*. Currently applies to JPEG compression of RGB images only. Images are stored in YCbCr color space, the value of the PhotometricInterpretation tag is *YCBCR*. Segment widths must be a multiple of 8 times the horizontal factor. Segment lengths and rowsperstrip must be a multiple of 8 times the vertical factor. The values are written to the YCbCrSubSampling tag. jpegtables: JPEG quantization and/or Huffman tables. Use for copying pre-compressed JPEG segments. The value is written to the JPEGTables tag. iccprofile: International Color Consortium (ICC) device profile characterizing image color space. The value is written verbatim to the InterColorProfile tag. colormap: RGB color values for corresponding data value. The colormap array must be of shape `(3, 2\*\*(data.itemsize*8))` and dtype uint16. The image's data type must be uint8 or uint16 and the values are indices into the last dimension of the colormap. The value is written to the ColorMap tag. description: Subject of image. Must be 7-bit ASCII. Cannot be used with the ImageJ or OME formats. The value is written to the ImageDescription tag of the first page of a series. datetime: Date and time of image creation in ``%Y:%m:%d %H:%M:%S`` format or datetime object. If *True*, the current date and time is used. The value is written to the DateTime tag of the first page of a series. resolution: Number of pixels per `resolutionunit` in X and Y directions as float or rational numbers. The default is (1.0, 1.0). The values are written to the YResolution and XResolution tags. resolutionunit: Unit of measurement for `resolution` values. The default is *NONE* if `resolution` is not specified and for ImageJ format, else *INCH*. The value is written to the ResolutionUnit tags. subfiletype: Bitfield to indicate kind of image. Set bit 0 if the image is a reduced-resolution version of another image. Set bit 1 if the image is part of a multi-page image. Set bit 2 if the image is transparency mask for another image (photometric must be MASK, SamplesPerPixel and bitspersample must be 1). software: Name of software used to create file. Must be 7-bit ASCII. The default is 'tifffile.py'. Unless *False*, the value is written to the Software tag of the first page of a series. subifds: Number of child IFDs. If greater than 0, the following `subifds` number of series are written as child IFDs of the current series. The number of IFDs written for each SubIFD level must match the number of IFDs written for the current series. All pages written to a certain SubIFD level of the current series must have the same hash. SubIFDs cannot be used with truncated or ImageJ files. SubIFDs in OME-TIFF files must be sub-resolutions of the main IFDs. metadata: Additional metadata describing image, written along with shape information in JSON, OME-XML, or ImageJ formats in ImageDescription or IJMetadata tags. If *None*, or the `shaped` argument to :py:class:`TiffWriter` is *False*, no information in JSON format is written to the ImageDescription tag. The 'axes' item defines the character codes for dimensions in `data` or `shape`. Refer to :py:class:`OmeXml` for supported keys when writing OME-TIFF. Refer to :py:func:`imagej_description` and :py:func:`imagej_metadata_tag` for items supported by the ImageJ format. Items 'Info', 'Labels', 'Ranges', 'LUTs', 'Plot', 'ROI', and 'Overlays' are written to the IJMetadata and IJMetadataByteCounts tags. Strings must be 7-bit ASCII. Written with the first page of a series only. extratags: Additional tags to write. A list of tuples with 5 items: 0. code (int): Tag Id. 1. dtype (:py:class:`DATATYPE`): Data type of items in `value`. 2. count (int): Number of data values. Not used for string or bytes values. 3. value (Sequence[Any]): `count` values compatible with `dtype`. Bytes must contain count values of dtype packed as binary data. 4. writeonce (bool): If *True*, write tag to first page of a series only. Duplicate and select tags in TIFF.TAG_FILTERED are not written if the extratag is specified by integer code. Extratags cannot be used to write IFD type tags. contiguous: If *False* (default), write data to a new series. If *True* and the data and arguments are compatible with previous written ones (same shape, no compression, etc.), the image data are stored contiguously after the previous one. In that case, `photometric`, `planarconfig`, and `rowsperstrip` are ignored. Metadata such as `description`, `metadata`, `datetime`, and `extratags` are written to the first page of a contiguous series only. Contiguous mode cannot be used with the OME or ImageJ formats. truncate: If *True*, only write first page of contiguous series if possible (uncompressed, contiguous, not tiled). Other TIFF readers will only be able to read part of the data. Cannot be used with the OME or ImageJ formats. align: Byte boundary on which to align image data in file. The default is 16. Use mmap.ALLOCATIONGRANULARITY for memory-mapped data. Following contiguous writes are not aligned. maxworkers: Maximum number of threads to concurrently compress tiles or strips. If *None* or *0*, use up to :py:attr:`_TIFF.MAXWORKERS` CPU cores for compressing large segments. Using multiple threads can significantly speed up this function if the bottleneck is encoding the data, for example, in case of large JPEG compressed tiles. If the bottleneck is I/O or pure Python code, using multiple threads might be detrimental. buffersize: Approximate number of bytes to compress in one pass. The default is :py:attr:`_TIFF.BUFFERSIZE` * 2. returnoffset: Return offset and number of bytes of memory-mappable image data in file. Returns: If `returnoffset` is *True* and the image data in the file are memory-mappable, return the offset and number of bytes of the image data in the file. """ # TODO: refactor this function fh: FileHandle storedshape: StoredShape = StoredShape(frames=-1) byteorder: Literal['>', '<'] inputshape: tuple[int, ...] datashape: tuple[int, ...] dataarray: NDArray[Any] | None = None dataiter: Iterator[NDArray[Any] | bytes | None] | None = None dataoffsetsoffset: tuple[int, int | None] | None = None databytecountsoffset: tuple[int, int | None] | None = None subifdsoffsets: tuple[int, int | None] | None = None datadtype: numpy.dtype[Any] bilevel: bool tiles: tuple[int, ...] ifdpos: int photometricsamples: int pos: int | None = None predictortag: int predictorfunc: Callable[..., Any] | None = None compressiontag: int compressionfunc: Callable[..., Any] | None = None tags: list[tuple[int, bytes, bytes | None, bool]] numtiles: int numstrips: int fh = self._fh byteorder = self.tiff.byteorder if data is None: # empty if shape is None or dtype is None: raise ValueError( "missing required 'shape' or 'dtype' arguments" ) dataarray = None dataiter = None datashape = tuple(shape) datadtype = numpy.dtype(dtype).newbyteorder(byteorder) elif hasattr(data, '__next__'): # iterator/generator if shape is None or dtype is None: raise ValueError( "missing required 'shape' or 'dtype' arguments" ) dataiter = data # type: ignore datashape = tuple(shape) datadtype = numpy.dtype(dtype).newbyteorder(byteorder) elif hasattr(data, 'dtype'): # numpy, zarr, or dask array data = cast(numpy.ndarray, data) # for MyPy dataarray = data datadtype = numpy.dtype(data.dtype).newbyteorder(byteorder) if not hasattr(data, 'reshape'): # zarr array cannot be shape-normalized dataarray = numpy.asarray(data, datadtype, 'C') else: try: # numpy array must be C contiguous if data.flags.f_contiguous: dataarray = numpy.asarray(data, datadtype, 'C') except AttributeError: # not a numpy array pass datashape = dataarray.shape dataiter = None if dtype is not None and numpy.dtype(dtype) != datadtype: raise ValueError( f'dtype argument {dtype!r} does not match ' f'data dtype {datadtype}' ) if shape is not None and shape != dataarray.shape: raise ValueError( f'shape argument {shape!r} does not match ' f'data shape {dataarray.shape}' ) else: # scalar, list, tuple, etc # if dtype is not specified, default to float64 datadtype = numpy.dtype(dtype).newbyteorder(byteorder) dataarray = numpy.asarray(data, datadtype, 'C') datashape = dataarray.shape dataiter = None del data if any(size >= 4294967296 for size in datashape): raise ValueError('invalid data shape') returnoffset = returnoffset and datadtype.isnative bilevel = datadtype.char == '?' if bilevel: index = -1 if datashape[-1] > 1 else -2 datasize = product(datashape[:index]) if datashape[index] % 8: datasize *= datashape[index] // 8 + 1 else: datasize *= datashape[index] // 8 else: datasize = product(datashape) * datadtype.itemsize if datasize == 0: dataarray = None compression = False bitspersample = None if metadata is not None: truncate = True if ( not compression or ( not isinstance(compression, bool) # because True == 1 and compression in ('NONE', 'none', 1) ) or ( isinstance(compression, (tuple, list)) and compression[0] in (None, 0, 1, 'NONE', 'none') ) ): compression = False if not predictor or ( not isinstance(predictor, bool) # because True == 1 and predictor in {'NONE', 'none', 1} ): predictor = False inputshape = datashape packints = ( bitspersample is not None and bitspersample != datadtype.itemsize * 8 ) # just append contiguous data if possible if self._datashape is not None and self._datadtype is not None: if ( not contiguous or self._datashape[1:] != datashape or self._datadtype != datadtype or (colormap is None and self._colormap is not None) or (self._colormap is None and colormap is not None) or not ( numpy.array_equal(colormap, self._colormap) # type: ignore ) ): # incompatible shape, dtype, or colormap self._write_remaining_pages() if self._imagej: raise ValueError( 'the ImageJ format does not support ' 'non-contiguous series' ) if self._omexml is not None: if self._subifdslevel < 0: # add image to OME-XML assert self._storedshape is not None assert self._metadata is not None self._omexml.addimage( dtype=self._datadtype, shape=self._datashape[ 0 if self._datashape[0] != 1 else 1 : ], storedshape=self._storedshape.shape, **self._metadata, ) elif metadata is not None: self._write_image_description() # description might have been appended to file fh.seek(0, os.SEEK_END) if self._subifds: if self._truncate or truncate: raise ValueError( 'SubIFDs cannot be used with truncated series' ) self._subifdslevel += 1 if self._subifdslevel == self._subifds: # done with writing SubIFDs self._nextifdoffsets = [] self._subifdsoffsets = [] self._subifdslevel = -1 self._subifds = 0 self._ifdindex = 0 elif subifds: raise ValueError( 'SubIFDs in SubIFDs are not supported' ) self._datashape = None self._colormap = None elif compression or packints or tile: raise ValueError( 'contiguous mode cannot be used with compression or tiles' ) else: # consecutive mode # write all data, write IFDs/tags later self._datashape = (self._datashape[0] + 1,) + datashape offset = fh.tell() if dataarray is None: fh.write_empty(datasize) else: fh.write_array(dataarray, datadtype) if returnoffset: return offset, datasize return None if self._ome is None: if description is None: self._ome = '.ome.' in fh.extension else: self._ome = False if self._tifffile or self._imagej: self._truncate = bool(truncate) elif truncate: raise ValueError( 'truncate can only be used with imagej or shaped formats' ) else: self._truncate = False if self._truncate and (compression or packints or tile): raise ValueError( 'truncate cannot be used with compression, packints, or tiles' ) if datasize == 0: # write single placeholder TiffPage for arrays with size=0 datashape = (0, 0) warnings.warn( f'{self!r} writing zero-size array to nonconformant TIFF', UserWarning, ) # TODO: reconsider this # raise ValueError('cannot save zero size array') tagnoformat = self.tiff.tagnoformat offsetformat = self.tiff.offsetformat offsetsize = self.tiff.offsetsize tagsize = self.tiff.tagsize MINISBLACK = PHOTOMETRIC.MINISBLACK MINISWHITE = PHOTOMETRIC.MINISWHITE RGB = PHOTOMETRIC.RGB YCBCR = PHOTOMETRIC.YCBCR PALETTE = PHOTOMETRIC.PALETTE CONTIG = PLANARCONFIG.CONTIG SEPARATE = PLANARCONFIG.SEPARATE # parse input if photometric is not None: photometric = enumarg(PHOTOMETRIC, photometric) if planarconfig: planarconfig = enumarg(PLANARCONFIG, planarconfig) if extrasamples is not None: # TODO: deprecate non-sequence extrasamples extrasamples = tuple( int(enumarg(EXTRASAMPLE, x)) for x in sequence(extrasamples) ) if compressionargs is None: compressionargs = {} if compression: if isinstance(compression, (tuple, list)): warnings.warn( f"{self!r} passing multiple values to the 'compression' " "parameter is deprecated since 2022.7.28. " "Use 'compressionargs' to pass extra arguments to the " "compression codec.", DeprecationWarning, stacklevel=2, ) if len(compression) == 2: compressionargs['level'] = compression[1] elif len(compression) == 3: compressionargs = dict(compression[2]) if compression[1] is not None: compressionargs['level'] = compression[1] else: raise ValueError('invalid compression') compression = compression[0] if isinstance(compression, str): compression = compression.upper() if compression == 'ZLIB': compression = 8 # ADOBE_DEFLATE elif isinstance(compression, bool): compression = 8 # ADOBE_DEFLATE compressiontag = enumarg(COMPRESSION, compression).value compression = True else: compressiontag = 1 compression = False if compressiontag == 1: compressionargs = {} elif compressiontag in {33003, 33004, 33005, 34712}: # JPEG2000: use J2K instead of JP2 compressionargs['codecformat'] = 0 # OPJ_CODEC_J2K if predictor: if not compression: raise ValueError('cannot use predictor without compression') if compressiontag in TIFF.IMAGE_COMPRESSIONS: # don't use predictor with JPEG, JPEG2000, WEBP, PNG, ... raise ValueError( 'cannot use predictor with ' f'{COMPRESSION(compressiontag)!r}' ) if isinstance(predictor, bool): if datadtype.kind == 'f': predictortag = 3 elif datadtype.kind in 'iu' and datadtype.itemsize <= 4: predictortag = 2 else: raise ValueError( f'cannot use predictor with {datadtype!r}' ) else: predictor = enumarg(PREDICTOR, predictor) if ( datadtype.kind in 'iu' and predictor.value not in {2, 34892, 34893} and datadtype.itemsize <= 4 ) or ( datadtype.kind == 'f' and predictor.value not in {3, 34894, 34895} ): raise ValueError( f'cannot use {predictor!r} with {datadtype!r}' ) predictortag = predictor.value else: predictortag = 1 del predictor predictorfunc = TIFF.PREDICTORS[predictortag] if self._ome: if description is not None: warnings.warn( f'{self!r} not writing description to OME-TIFF', UserWarning, ) description = None if self._omexml is None: if metadata is None: self._omexml = OmeXml() else: self._omexml = OmeXml(**metadata) if volumetric or (tile and len(tile) > 2): raise ValueError('OME-TIFF does not support ImageDepth') volumetric = False elif self._imagej: # if tile is not None or predictor or compression: # warnings.warn( # f'{self!r} the ImageJ format does not support ' # 'tiles, predictors, compression' # ) if description is not None: warnings.warn( f'{self!r} not writing description to ImageJ file', UserWarning, ) description = None if datadtype.char not in 'BHhf': raise ValueError( 'the ImageJ format does not support data type ' f'{datadtype.char!r}' ) if volumetric or (tile and len(tile) > 2): raise ValueError( 'the ImageJ format does not support ImageDepth' ) volumetric = False ijrgb = photometric == RGB if photometric else None if datadtype.char != 'B': if photometric == RGB: raise ValueError( 'the ImageJ format does not support ' f'data type {datadtype!r} for RGB' ) ijrgb = False if metadata is None: axes = None else: axes = metadata.get('axes', None) ijshape = imagej_shape(datashape, rgb=ijrgb, axes=axes) if planarconfig == SEPARATE: raise ValueError( 'the ImageJ format does not support planar samples' ) if ijshape[-1] in {3, 4}: photometric = RGB elif photometric is None: photometric = MINISBLACK planarconfig = None planarconfig = CONTIG if ijrgb else None # verify colormap and indices if colormap is not None: colormap = numpy.asarray(colormap, dtype=byteorder + 'H') if datadtype.char in 'BH': if colormap.shape != (3, 2 ** (datadtype.itemsize * 8)): raise ValueError('invalid colormap shape') elif self._imagej: if colormap.shape != (3, 256): raise ValueError('invalid colormap shape') else: raise ValueError('invalid data dtype for palette mode') self._colormap = colormap if tile: # verify tile shape if ( not 1 < len(tile) < 4 or tile[-1] % 16 or tile[-2] % 16 or any(i < 1 for i in tile) ): raise ValueError(f'invalid tile shape {tile}') tile = tuple(int(i) for i in tile) if volumetric and len(tile) == 2: tile = (1,) + tile volumetric = len(tile) == 3 else: tile = () volumetric = bool(volumetric) # normalize data shape to 5D or 6D, depending on volume: # (pages, separate_samples, [depth,] length, width, contig_samples) shape = reshape_nd( datashape, TIFF.PHOTOMETRIC_SAMPLES.get(photometric, 2), # type: ignore ) ndim = len(shape) if volumetric and ndim < 3: volumetric = False if colormap is not None and datadtype.char in 'BH': photometric = PALETTE planarconfig = None if photometric is None: deprecate = False photometric = MINISBLACK if bilevel: photometric = MINISWHITE elif planarconfig == CONTIG: if ndim > 2 and shape[-1] in {3, 4}: photometric = RGB deprecate = datadtype.char not in 'BH' elif planarconfig == SEPARATE: if volumetric and ndim > 3 and shape[-4] in {3, 4}: photometric = RGB deprecate = True elif ndim > 2 and shape[-3] in {3, 4}: photometric = RGB deprecate = True elif ndim > 2 and shape[-1] in {3, 4}: photometric = RGB planarconfig = CONTIG deprecate = datadtype.char not in 'BH' elif self._imagej or self._ome: photometric = MINISBLACK planarconfig = None elif volumetric and ndim > 3 and shape[-4] in {3, 4}: photometric = RGB planarconfig = SEPARATE deprecate = True elif ndim > 2 and shape[-3] in {3, 4}: photometric = RGB planarconfig = SEPARATE deprecate = True if deprecate: if planarconfig == CONTIG: msg = 'contiguous samples', 'parameter is' else: msg = ( 'separate component planes', "and 'planarconfig' parameters are", ) warnings.warn( f" data with shape {datashape} " f"and dtype '{datadtype}' are stored as RGB with {msg[0]}." " Future versions will store such data as MINISBLACK in " "separate pages by default, unless the 'photometric' " f"{msg[1]} specified.", DeprecationWarning, stacklevel=2, ) del msg del deprecate del datashape assert photometric is not None photometricsamples = TIFF.PHOTOMETRIC_SAMPLES[photometric] if planarconfig and len(shape) <= (3 if volumetric else 2): # TODO: raise error? planarconfig = None if photometricsamples > 1: photometric = MINISBLACK if photometricsamples > 1: if len(shape) < 3: raise ValueError(f'not a {photometric!r} image') if len(shape) < 4: volumetric = False if planarconfig is None: if photometric == RGB: samples_set = {photometricsamples, 4} # allow common alpha else: samples_set = {photometricsamples} if shape[-1] in samples_set: planarconfig = CONTIG elif shape[-4 if volumetric else -3] in samples_set: planarconfig = SEPARATE elif shape[-1] > shape[-4 if volumetric else -3]: # TODO: deprecated this? planarconfig = SEPARATE else: planarconfig = CONTIG if planarconfig == CONTIG: storedshape.contig_samples = shape[-1] storedshape.width = shape[-2] storedshape.length = shape[-3] if volumetric: storedshape.depth = shape[-4] else: storedshape.width = shape[-1] storedshape.length = shape[-2] if volumetric: storedshape.depth = shape[-3] storedshape.separate_samples = shape[-4] else: storedshape.separate_samples = shape[-3] if storedshape.samples > photometricsamples: storedshape.extrasamples = ( storedshape.samples - photometricsamples ) elif photometric == PHOTOMETRIC.CFA: if len(shape) != 2: raise ValueError('invalid CFA image') volumetric = False planarconfig = None storedshape.width = shape[-1] storedshape.length = shape[-2] # if all(et[0] != 50706 for et in extratags): # raise ValueError('must specify DNG tags for CFA image') elif planarconfig and len(shape) > (3 if volumetric else 2): if planarconfig == CONTIG: if extrasamples is None or len(extrasamples) > 0: # use extrasamples storedshape.contig_samples = shape[-1] storedshape.width = shape[-2] storedshape.length = shape[-3] if volumetric: storedshape.depth = shape[-4] else: planarconfig = None storedshape.contig_samples = 1 storedshape.width = shape[-1] storedshape.length = shape[-2] if volumetric: storedshape.depth = shape[-3] else: storedshape.width = shape[-1] storedshape.length = shape[-2] if extrasamples is None or len(extrasamples) > 0: # use extrasamples if volumetric: storedshape.depth = shape[-3] storedshape.separate_samples = shape[-4] else: storedshape.separate_samples = shape[-3] else: planarconfig = None storedshape.separate_samples = 1 if volumetric: storedshape.depth = shape[-3] storedshape.extrasamples = storedshape.samples - 1 # TODO: do not squeeze data when writing OME or ImageJ files (breaking) # elif self._ome or self._imagej or metadata in (None, False): # planarconfig = None # if extrasamples is None: # if len(shape) < 3: # volumetric = False # if len(shape) < 3 or shape[-1] != 1: # storedshape = ( # (-1, 1) + shape[(-3 if volumetric else -2) :] + (1,) # ) # else: # storedshape = (-1, 1) +shape[(-4 if volumetric else -3):] # else: # assert len(shape) > 2 # if len(shape) < 4: # volumetric = False # storedshape = (-1, 1) + shape[(-4 if volumetric else -3) :] # samplesperpixel = storedshape[-1] # storedshape.extrasamples = samplesperpixel - 1 else: # shaped series planarconfig = None while len(shape) > 2 and shape[-1] == 1: shape = shape[:-1] # remove trailing 1s if len(shape) < 3: volumetric = False if not extrasamples: storedshape.width = shape[-1] storedshape.length = shape[-2] if volumetric: storedshape.depth = shape[-3] else: storedshape.contig_samples = shape[-1] storedshape.width = shape[-2] storedshape.length = shape[-3] if volumetric: storedshape.depth = shape[-4] storedshape.extrasamples = storedshape.samples - 1 if not volumetric and tile and len(tile) == 3 and tile[0] > 1: raise ValueError( f' cannot write {storedshape!r} ' f'using volumetric tiles {tile}' ) if subfiletype is not None and subfiletype & 0b100: # FILETYPE_MASK if not ( bilevel and storedshape.samples == 1 and photometric in {0, 1, 4} ): raise ValueError('invalid SubfileType MASK') photometric = PHOTOMETRIC.MASK packints = False if bilevel: if bitspersample is not None and bitspersample != 1: raise ValueError(f'{bitspersample=} must be 1 for bilevel') bitspersample = 1 elif compressiontag == 7 and datadtype == 'uint16': if bitspersample is not None and bitspersample != 12: raise ValueError( f'{bitspersample=} must be 12 for JPEG compressed uint16' ) bitspersample = 12 # use 12-bit JPEG compression elif bitspersample is None: bitspersample = datadtype.itemsize * 8 elif ( datadtype.kind != 'u' or datadtype.itemsize > 4 ) and bitspersample != datadtype.itemsize * 8: raise ValueError(f'{bitspersample=} does not match {datadtype=}') elif not ( bitspersample > {1: 0, 2: 8, 4: 16}[datadtype.itemsize] and bitspersample <= datadtype.itemsize * 8 ): raise ValueError(f'{bitspersample=} out of range of {datadtype=}') elif compression: if bitspersample != datadtype.itemsize * 8: raise ValueError( f'{bitspersample=} cannot be used with compression' ) elif bitspersample != datadtype.itemsize * 8: packints = True if storedshape.frames == -1: s0 = storedshape.page_size storedshape.frames = 1 if s0 == 0 else product(inputshape) // s0 if datasize > 0 and not storedshape.is_valid: raise RuntimeError(f'invalid {storedshape!r}') if photometric == PALETTE: if storedshape.samples != 1 or storedshape.extrasamples > 0: raise ValueError(f'invalid {storedshape!r} for palette mode') elif storedshape.samples < photometricsamples: raise ValueError( f'not enough samples for {photometric!r}: ' f'expected {photometricsamples}, got {storedshape.samples}' ) if ( planarconfig is not None and storedshape.planarconfig != planarconfig ): raise ValueError( f'{planarconfig!r} does not match {storedshape!r}' ) del planarconfig if dataarray is not None: dataarray = dataarray.reshape(storedshape.shape) tags = [] # list of (code, ifdentry, ifdvalue, writeonce) if tile: tagbytecounts = 325 # TileByteCounts tagoffsets = 324 # TileOffsets else: tagbytecounts = 279 # StripByteCounts tagoffsets = 273 # StripOffsets self._dataoffsetstag = tagoffsets pack = self._pack addtag = self._addtag if extratags is None: extratags = () if description is not None: # ImageDescription: user provided description addtag(tags, 270, 2, 0, description, True) # write shape and metadata to ImageDescription self._metadata = {} if not metadata else metadata.copy() if self._omexml is not None: if len(self._omexml.images) == 0: # rewritten later at end of file description = '\x00\x00\x00\x00' else: description = None elif self._imagej: ijmetadata = parse_kwargs( self._metadata, 'Info', 'Labels', 'Ranges', 'LUTs', 'Plot', 'ROI', 'Overlays', 'Properties', 'info', 'labels', 'ranges', 'luts', 'plot', 'roi', 'overlays', 'prop', ) for t in imagej_metadata_tag(ijmetadata, byteorder): addtag(tags, *t) description = imagej_description( inputshape, rgb=storedshape.contig_samples in {3, 4}, colormaped=self._colormap is not None, **self._metadata, ) description += '\x00' * 64 # add buffer for in-place update elif self._tifffile and (metadata or metadata == {}): if self._truncate: self._metadata.update(truncated=True) description = shaped_description(inputshape, **self._metadata) description += '\x00' * 16 # add buffer for in-place update # elif metadata is None and self._truncate: # raise ValueError('cannot truncate without writing metadata') elif description is not None: if not isinstance(description, bytes): description = description.encode('ascii') self._descriptiontag = TiffTag( self, 0, 270, 2, len(description), description, 0 ) description = None if description is None: # disable shaped format if user disabled metadata self._tifffile = False else: description = description.encode('ascii') addtag(tags, 270, 2, 0, description, True) self._descriptiontag = TiffTag( self, 0, 270, 2, len(description), description, 0 ) del description if software is None: software = 'tifffile.py' if software: addtag(tags, 305, 2, 0, software, True) if datetime: if isinstance(datetime, str): if len(datetime) != 19 or datetime[16] != ':': raise ValueError('invalid datetime string') else: try: # assert isinstance(datetime, datetime.datetime) datetime = datetime.strftime( # type: ignore '%Y:%m:%d %H:%M:%S' ) except AttributeError: datetime = self._now().strftime('%Y:%m:%d %H:%M:%S') addtag(tags, 306, 2, 0, datetime, True) addtag(tags, 259, 3, 1, compressiontag) # Compression if compressiontag == 34887: # LERC if compressionargs is None or 'compression' not in compressionargs: lerc_compression = 0 elif compressionargs['compression'] is None: lerc_compression = 0 elif compressionargs['compression'] == 'deflate': lerc_compression = 1 elif compressionargs['compression'] == 'zstd': lerc_compression = 2 else: raise ValueError( 'invalid LERC compression ' f'{compressionargs["compression"]!r}' ) addtag(tags, 50674, 4, 2, (4, lerc_compression)) del lerc_compression if predictortag != 1: addtag(tags, 317, 3, 1, predictortag) addtag(tags, 256, 4, 1, storedshape.width) # ImageWidth addtag(tags, 257, 4, 1, storedshape.length) # ImageLength if tile: addtag(tags, 322, 4, 1, tile[-1]) # TileWidth addtag(tags, 323, 4, 1, tile[-2]) # TileLength if volumetric: addtag(tags, 32997, 4, 1, storedshape.depth) # ImageDepth if tile: addtag(tags, 32998, 4, 1, tile[0]) # TileDepth if subfiletype is not None: addtag(tags, 254, 4, 1, subfiletype) # NewSubfileType if (subifds or self._subifds) and self._subifdslevel < 0: if self._subifds: subifds = self._subifds elif hasattr(subifds, '__len__'): # allow TiffPage.subifds tuple subifds = len(subifds) # type: ignore else: subifds = int(subifds) # type: ignore self._subifds = subifds addtag( tags, 330, 18 if offsetsize > 4 else 13, subifds, [0] * subifds ) if not bilevel and not datadtype.kind == 'u': # SampleFormat sampleformat = {'u': 1, 'i': 2, 'f': 3, 'c': 6}[datadtype.kind] addtag( tags, 339, 3, storedshape.samples, (sampleformat,) * storedshape.samples, ) if colormap is not None: addtag(tags, 320, 3, colormap.size, colormap) if iccprofile is not None: addtag(tags, 34675, 7, len(iccprofile), iccprofile) addtag(tags, 277, 3, 1, storedshape.samples) if bilevel: # PlanarConfiguration if storedshape.samples > 1: addtag(tags, 284, 3, 1, storedshape.planarconfig) elif storedshape.samples > 1: # PlanarConfiguration addtag(tags, 284, 3, 1, storedshape.planarconfig) # BitsPerSample addtag( tags, 258, 3, storedshape.samples, (bitspersample,) * storedshape.samples, ) else: addtag(tags, 258, 3, 1, bitspersample) if storedshape.extrasamples > 0: if extrasamples is not None: if storedshape.extrasamples != len(extrasamples): raise ValueError( 'wrong number of extrasamples ' f'{storedshape.extrasamples} != {len(extrasamples)}' ) addtag(tags, 338, 3, len(extrasamples), extrasamples) elif photometric == RGB and storedshape.extrasamples == 1: # Unassociated alpha channel addtag(tags, 338, 3, 1, 2) else: # Unspecified alpha channel addtag( tags, 338, 3, storedshape.extrasamples, (0,) * storedshape.extrasamples, ) if jpegtables is not None: addtag(tags, 347, 7, len(jpegtables), jpegtables) if ( compressiontag == 7 and storedshape.planarconfig == 1 and photometric in {RGB, YCBCR} ): # JPEG compression with subsampling # TODO: use JPEGTables for multiple tiles or strips if subsampling is None: subsampling = (2, 2) elif subsampling not in {(1, 1), (2, 1), (2, 2), (4, 1)}: raise ValueError( f'invalid subsampling factors {subsampling!r}' ) maxsampling = max(subsampling) * 8 if tile and (tile[-1] % maxsampling or tile[-2] % maxsampling): raise ValueError(f'tile shape not a multiple of {maxsampling}') if storedshape.extrasamples > 1: raise ValueError('JPEG subsampling requires RGB(A) images') addtag(tags, 530, 3, 2, subsampling) # YCbCrSubSampling # use PhotometricInterpretation YCBCR by default outcolorspace = enumarg( PHOTOMETRIC, compressionargs.get('outcolorspace', 6) ) compressionargs['subsampling'] = subsampling compressionargs['colorspace'] = photometric.name compressionargs['outcolorspace'] = outcolorspace.name addtag(tags, 262, 3, 1, outcolorspace) if outcolorspace == YCBCR: # ReferenceBlackWhite is required for YCBCR if all(et[0] != 532 for et in extratags): addtag( tags, 532, 5, 6, (0, 1, 255, 1, 128, 1, 255, 1, 128, 1, 255, 1), ) else: if subsampling not in {None, (1, 1)}: logger().warning( f'{self!r} cannot apply subsampling {subsampling!r}' ) subsampling = None maxsampling = 1 addtag( tags, 262, 3, 1, photometric.value ) # PhotometricInterpretation if photometric == YCBCR: # YCbCrSubSampling and ReferenceBlackWhite addtag(tags, 530, 3, 2, (1, 1)) if all(et[0] != 532 for et in extratags): addtag( tags, 532, 5, 6, (0, 1, 255, 1, 128, 1, 255, 1, 128, 1, 255, 1), ) if resolutionunit is not None: resolutionunit = enumarg(RESUNIT, resolutionunit) elif self._imagej or resolution is None: resolutionunit = RESUNIT.NONE else: resolutionunit = RESUNIT.INCH if resolution is not None: addtag(tags, 282, 5, 1, rational(resolution[0])) # XResolution addtag(tags, 283, 5, 1, rational(resolution[1])) # YResolution if len(resolution) > 2: warnings.warn( " passing a unit along " "with the 'resolution' parameter is deprecated " "since 2022.7.28. Use the 'resolutionunit' parameter.", DeprecationWarning, stacklevel=2, ) unit = resolution[2] # type: ignore if unit is not None: resolutionunit = enumarg(RESUNIT, unit) addtag(tags, 296, 3, 1, resolutionunit) # ResolutionUnit else: addtag(tags, 282, 5, 1, (1, 1)) # XResolution addtag(tags, 283, 5, 1, (1, 1)) # YResolution addtag(tags, 296, 3, 1, resolutionunit) # ResolutionUnit # can save data array contiguous contiguous = not (compression or packints or bilevel) if tile: # one chunk per tile per plane if len(tile) == 2: tiles = ( (storedshape.length + tile[0] - 1) // tile[0], (storedshape.width + tile[1] - 1) // tile[1], ) contiguous = ( contiguous and storedshape.length == tile[0] and storedshape.width == tile[1] ) else: tiles = ( (storedshape.depth + tile[0] - 1) // tile[0], (storedshape.length + tile[1] - 1) // tile[1], (storedshape.width + tile[2] - 1) // tile[2], ) contiguous = ( contiguous and storedshape.depth == tile[0] and storedshape.length == tile[1] and storedshape.width == tile[2] ) numtiles = product(tiles) * storedshape.separate_samples databytecounts = [ product(tile) * storedshape.contig_samples * datadtype.itemsize ] * numtiles bytecountformat = self._bytecount_format( databytecounts, compressiontag ) addtag( tags, tagbytecounts, bytecountformat, numtiles, databytecounts ) addtag(tags, tagoffsets, offsetformat, numtiles, [0] * numtiles) bytecountformat = f'{numtiles}{bytecountformat}' if not contiguous: if dataarray is not None: dataiter = iter_tiles(dataarray, tile, tiles) elif dataiter is None and not ( compression or packints or bilevel ): def dataiter_( numtiles: int = numtiles * storedshape.frames, bytecount: int = databytecounts[0], ) -> Iterator[bytes]: # yield empty tiles chunk = bytes(bytecount) for _ in range(numtiles): yield chunk dataiter = dataiter_() rowsperstrip = 0 elif contiguous and ( rowsperstrip is None or rowsperstrip >= storedshape.length ): count = storedshape.separate_samples * storedshape.depth databytecounts = [ storedshape.length * storedshape.width * storedshape.contig_samples * datadtype.itemsize ] * count bytecountformat = self._bytecount_format( databytecounts, compressiontag ) addtag(tags, tagbytecounts, bytecountformat, count, databytecounts) addtag(tags, tagoffsets, offsetformat, count, [0] * count) addtag(tags, 278, 4, 1, storedshape.length) # RowsPerStrip bytecountformat = f'{count}{bytecountformat}' rowsperstrip = storedshape.length numstrips = count else: # use rowsperstrip rowsize = ( storedshape.width * storedshape.contig_samples * datadtype.itemsize ) if compressiontag == 48124: # Jetraw works on whole camera frame rowsperstrip = storedshape.length if rowsperstrip is None: # compress ~256 KB chunks by default # TIFF-EP requires <= 64 KB if compression: rowsperstrip = 262144 // rowsize else: rowsperstrip = storedshape.length if rowsperstrip < 1: rowsperstrip = maxsampling elif rowsperstrip > storedshape.length: rowsperstrip = storedshape.length elif subsampling and rowsperstrip % maxsampling: rowsperstrip = ( math.ceil(rowsperstrip / maxsampling) * maxsampling ) assert rowsperstrip is not None addtag(tags, 278, 4, 1, rowsperstrip) # RowsPerStrip numstrips1 = ( storedshape.length + rowsperstrip - 1 ) // rowsperstrip numstrips = ( numstrips1 * storedshape.separate_samples * storedshape.depth ) # TODO: save bilevel data with rowsperstrip stripsize = rowsperstrip * rowsize databytecounts = [stripsize] * numstrips laststripsize = stripsize - rowsize * ( numstrips1 * rowsperstrip - storedshape.length ) for i in range(numstrips1 - 1, numstrips, numstrips1): databytecounts[i] = laststripsize bytecountformat = self._bytecount_format( databytecounts, compressiontag ) addtag( tags, tagbytecounts, bytecountformat, numstrips, databytecounts ) addtag(tags, tagoffsets, offsetformat, numstrips, [0] * numstrips) bytecountformat = bytecountformat * numstrips if dataarray is not None and not contiguous: dataiter = iter_images(dataarray) if dataiter is None and not contiguous: raise ValueError('cannot write non-contiguous empty file') # add extra tags from user; filter duplicate and select tags extratag: TagTuple tagset = {t[0] for t in tags} tagset.update(TIFF.TAG_FILTERED) for extratag in extratags: if extratag[0] in tagset: logger().warning( f'{self!r} not writing extratag {extratag[0]}' ) else: addtag(tags, *extratag) del tagset del extratags # TODO: check TIFFReadDirectoryCheckOrder warning in files containing # multiple tags of same code # the entries in an IFD must be sorted in ascending order by tag code tags = sorted(tags, key=lambda x: x[0]) # define compress function compressionaxis: int = -2 bytesiter: bool = False iteritem: NDArray[Any] | bytes | None if dataiter is not None: iteritem, dataiter = peek_iterator(dataiter) bytesiter = isinstance(iteritem, bytes) if not bytesiter: iteritem = numpy.asarray(iteritem) if ( tile and storedshape.contig_samples == 1 and iteritem.shape[-1] != 1 ): # issue 185 compressionaxis = -1 if iteritem.dtype.char != datadtype.char: raise ValueError( f'dtype of iterator {iteritem.dtype!r} ' f'does not match dtype {datadtype!r}' ) else: iteritem = None if bilevel: if compressiontag == 1: def compressionfunc1(data, axis=compressionaxis) -> bytes: return numpy.packbits(data, axis=axis).tobytes() compressionfunc = compressionfunc1 elif compressiontag in {5, 32773, 8, 32946, 50013, 34925, 50000}: # LZW, PackBits, deflate, LZMA, ZSTD def compressionfunc2( data, compressor=TIFF.COMPRESSORS[compressiontag], axis=compressionaxis, kwargs=compressionargs, ) -> bytes: data = numpy.packbits(data, axis=axis).tobytes() return compressor(data, **kwargs) compressionfunc = compressionfunc2 else: raise NotImplementedError('cannot compress bilevel image') elif compression: compressor = TIFF.COMPRESSORS[compressiontag] if compressiontag == 32773: # PackBits compressionargs['axis'] = compressionaxis # elif compressiontag == 48124: # # Jetraw # imagecodecs.jetraw_init( # parameters=compressionargs.pop('parameters', None), # verbose=compressionargs.pop('verbose', None), # ) # if not 'identifier' in compressionargs: # raise ValueError( # "jetraw_encode() missing argument: 'identifier'" # ) if subsampling: # JPEG with subsampling def compressionfunc( data, compressor=compressor, kwargs=compressionargs ) -> bytes: return compressor(data, **kwargs) elif predictorfunc is not None: def compressionfunc( data, predictorfunc=predictorfunc, compressor=compressor, axis=compressionaxis, kwargs=compressionargs, ) -> bytes: data = predictorfunc(data, axis=axis) return compressor(data, **kwargs) elif compressionargs: def compressionfunc( data, compressor=compressor, kwargs=compressionargs ) -> bytes: return compressor(data, **kwargs) elif compressiontag > 1: compressionfunc = compressor else: compressionfunc = None elif packints: def compressionfunc( data, bps=bitspersample, axis=compressionaxis ) -> bytes: return imagecodecs.packints_encode(data, bps, axis=axis) else: compressionfunc = None del compression if not contiguous and not bytesiter and compressionfunc is not None: # create iterator of encoded tiles or strips bytesiter = True if tile: # dataiter yields tiles tileshape = tile + (storedshape.contig_samples,) # type: ignore tilesize = product(tileshape) * datadtype.itemsize maxworkers = TiffWriter._maxworkers( maxworkers, numtiles * storedshape.frames, tilesize, compressiontag, ) # yield encoded tiles dataiter = encode_chunks( numtiles * storedshape.frames, dataiter, # type: ignore compressionfunc, tileshape, datadtype, maxworkers, buffersize, True, ) else: # dataiter yields frames maxworkers = TiffWriter._maxworkers( maxworkers, numstrips * storedshape.frames, stripsize, compressiontag, ) # yield strips dataiter = iter_strips( dataiter, # type: ignore storedshape.page_shape, datadtype, rowsperstrip, ) # yield encoded strips dataiter = encode_chunks( numstrips * storedshape.frames, dataiter, compressionfunc, ( rowsperstrip, storedshape.width, storedshape.contig_samples, ), datadtype, maxworkers, buffersize, False, ) fhpos = fh.tell() # commented out to allow image data beyond 4GB in classic TIFF # if ( # not ( # offsetsize > 4 # or self._imagej or compressionfunc is not None # ) # and fhpos + datasize > 2**32 - 1 # ): # raise ValueError('data too large for classic TIFF format') dataoffset: int = 0 # if not compressed or multi-tiled, write the first IFD and then # all data contiguously; else, write all IFDs and data interleaved for pageindex in range(1 if contiguous else storedshape.frames): ifdpos = fhpos if ifdpos % 2: # position of IFD must begin on a word boundary fh.write(b'\x00') ifdpos += 1 if self._subifdslevel < 0: # update pointer at ifdoffset fh.seek(self._ifdoffset) fh.write(pack(offsetformat, ifdpos)) fh.seek(ifdpos) # create IFD in memory if pageindex < 2: subifdsoffsets = None ifd = io.BytesIO() ifd.write(pack(tagnoformat, len(tags))) tagoffset = ifd.tell() ifd.write(b''.join(t[1] for t in tags)) ifdoffset = ifd.tell() ifd.write(pack(offsetformat, 0)) # offset to next IFD # write tag values and patch offsets in ifdentries for tagindex, tag in enumerate(tags): offset = tagoffset + tagindex * tagsize + 4 + offsetsize code = tag[0] value = tag[2] if value: pos = ifd.tell() if pos % 2: # tag value is expected to begin on word boundary ifd.write(b'\x00') pos += 1 ifd.seek(offset) ifd.write(pack(offsetformat, ifdpos + pos)) ifd.seek(pos) ifd.write(value) if code == tagoffsets: dataoffsetsoffset = offset, pos elif code == tagbytecounts: databytecountsoffset = offset, pos elif code == 270: if ( self._descriptiontag is not None and self._descriptiontag.offset == 0 and value.startswith( self._descriptiontag.value ) ): self._descriptiontag.offset = ( ifdpos + tagoffset + tagindex * tagsize ) self._descriptiontag.valueoffset = ifdpos + pos elif code == 330: subifdsoffsets = offset, pos elif code == tagoffsets: dataoffsetsoffset = offset, None elif code == tagbytecounts: databytecountsoffset = offset, None elif code == 270: if ( self._descriptiontag is not None and self._descriptiontag.offset == 0 and self._descriptiontag.value in tag[1][-4:] ): self._descriptiontag.offset = ( ifdpos + tagoffset + tagindex * tagsize ) self._descriptiontag.valueoffset = ( self._descriptiontag.offset + offsetsize + 4 ) elif code == 330: subifdsoffsets = offset, None ifdsize = ifd.tell() if ifdsize % 2: ifd.write(b'\x00') ifdsize += 1 # write IFD later when strip/tile bytecounts and offsets are known fh.seek(ifdsize, os.SEEK_CUR) # write image data dataoffset = fh.tell() if align is None: align = 16 skip = (align - (dataoffset % align)) % align fh.seek(skip, os.SEEK_CUR) dataoffset += skip if contiguous: # write all image data contiguously if dataiter is not None: byteswritten = 0 if bytesiter: for iteritem in dataiter: # assert isinstance(iteritem, bytes) byteswritten += fh.write(iteritem) # type: ignore del iteritem else: pagesize = storedshape.page_size * datadtype.itemsize for iteritem in dataiter: if iteritem is None: byteswritten += fh.write_empty(pagesize) else: # assert isinstance(iteritem, numpy.ndarray) byteswritten += fh.write_array( iteritem, datadtype # type: ignore ) del iteritem if byteswritten != datasize: raise ValueError( 'iterator contains wrong number of bytes ' f'{byteswritten} != {datasize}' ) elif dataarray is None: fh.write_empty(datasize) else: fh.write_array(dataarray, datadtype) elif bytesiter: # write tiles or strips assert dataiter is not None for chunkindex in range(numtiles if tile else numstrips): iteritem = cast(bytes, next(dataiter)) # assert isinstance(iteritem, bytes) databytecounts[chunkindex] = len(iteritem) fh.write(iteritem) del iteritem elif tile: # write uncompressed tiles assert dataiter is not None tileshape = tile + (storedshape.contig_samples,) tilesize = product(tileshape) * datadtype.itemsize for tileindex in range(numtiles): iteritem = next(dataiter) if iteritem is None: databytecounts[tileindex] = 0 # fh.write_empty(tilesize) continue # assert not isinstance(iteritem, bytes) iteritem = numpy.ascontiguousarray(iteritem, datadtype) if iteritem.nbytes != tilesize: # if iteritem.dtype != datadtype: # raise ValueError( # 'dtype of tile does not match data' # ) if iteritem.nbytes > tilesize: raise ValueError('tile is too large') pad = tuple( (0, i - j) for i, j in zip(tileshape, iteritem.shape) ) iteritem = numpy.pad(iteritem, pad) fh.write_array(iteritem) del iteritem else: raise RuntimeError('unreachable code') # update strip/tile offsets assert dataoffsetsoffset is not None offset, pos = dataoffsetsoffset ifd.seek(offset) if pos is not None: ifd.write(pack(offsetformat, ifdpos + pos)) ifd.seek(pos) offset = dataoffset for size in databytecounts: ifd.write(pack(offsetformat, offset if size > 0 else 0)) offset += size else: ifd.write(pack(offsetformat, dataoffset)) if compressionfunc is not None or (tile and dataarray is None): # update strip/tile bytecounts assert databytecountsoffset is not None offset, pos = databytecountsoffset ifd.seek(offset) if pos is not None: ifd.write(pack(offsetformat, ifdpos + pos)) ifd.seek(pos) ifd.write(pack(bytecountformat, *databytecounts)) if subifdsoffsets is not None: # update and save pointer to SubIFDs tag values if necessary offset, pos = subifdsoffsets if pos is not None: ifd.seek(offset) ifd.write(pack(offsetformat, ifdpos + pos)) self._subifdsoffsets.append(ifdpos + pos) else: self._subifdsoffsets.append(ifdpos + offset) fhpos = fh.tell() fh.seek(ifdpos) fh.write(ifd.getbuffer()) fh.flush() if self._subifdslevel < 0: self._ifdoffset = ifdpos + ifdoffset else: # update SubIFDs tag values fh.seek( self._subifdsoffsets[self._ifdindex] + self._subifdslevel * offsetsize ) fh.write(pack(offsetformat, ifdpos)) # update SubIFD chain offsets if self._subifdslevel == 0: self._nextifdoffsets.append(ifdpos + ifdoffset) else: fh.seek(self._nextifdoffsets[self._ifdindex]) fh.write(pack(offsetformat, ifdpos)) self._nextifdoffsets[self._ifdindex] = ifdpos + ifdoffset self._ifdindex += 1 self._ifdindex %= len(self._subifdsoffsets) fh.seek(fhpos) # remove tags that should be written only once if pageindex == 0: tags = [tag for tag in tags if not tag[-1]] assert dataoffset > 0 self._datashape = (1,) + inputshape self._datadtype = datadtype self._dataoffset = dataoffset self._databytecounts = databytecounts self._storedshape = storedshape if contiguous: # write remaining IFDs/tags later self._tags = tags # return offset and size of image data if returnoffset: return dataoffset, sum(databytecounts) return None def save(self, *args: Any, **kwargs: Any) -> None: warnings.warn( ' is deprecated. Use TiffWriter.write', DeprecationWarning, stacklevel=2, ) self.write(*args, **kwargs) def overwrite_description(self, description: str, /) -> None: """Overwrite value of last ImageDescription tag. Can be used to write OME-XML after writing images. Ends a contiguous series. """ if self._descriptiontag is None: raise ValueError('no ImageDescription tag found') self._write_remaining_pages() self._descriptiontag.overwrite(description, erase=False) self._descriptiontag = None def close(self) -> None: """Write remaining pages and close file handle.""" try: if not self._truncate: self._write_remaining_pages() self._write_image_description() finally: try: self._fh.close() except Exception: pass @property def filehandle(self) -> FileHandle: """File handle to write file.""" return self._fh def _write_remaining_pages(self) -> None: """Write outstanding IFDs and tags to file.""" if not self._tags or self._truncate or self._datashape is None: return assert self._storedshape is not None assert self._databytecounts is not None assert self._dataoffset is not None pageno: int = self._storedshape.frames * self._datashape[0] - 1 if pageno < 1: self._tags = None self._dataoffset = None self._databytecounts = None return fh = self._fh fhpos: int = fh.tell() if fhpos % 2: fh.write(b'\x00') fhpos += 1 pack = struct.pack offsetformat: str = self.tiff.offsetformat offsetsize: int = self.tiff.offsetsize tagnoformat: str = self.tiff.tagnoformat tagsize: int = self.tiff.tagsize dataoffset: int = self._dataoffset pagedatasize: int = sum(self._databytecounts) subifdsoffsets: tuple[int, int | None] | None = None dataoffsetsoffset: tuple[int, int | None] pos: int | None offset: int # construct template IFD in memory # must patch offsets to next IFD and data before writing to file ifd = io.BytesIO() ifd.write(pack(tagnoformat, len(self._tags))) tagoffset = ifd.tell() ifd.write(b''.join(t[1] for t in self._tags)) ifdoffset = ifd.tell() ifd.write(pack(offsetformat, 0)) # offset to next IFD # tag values for tagindex, tag in enumerate(self._tags): offset = tagoffset + tagindex * tagsize + offsetsize + 4 code = tag[0] value = tag[2] if value: pos = ifd.tell() if pos % 2: # tag value is expected to begin on word boundary ifd.write(b'\x00') pos += 1 ifd.seek(offset) try: ifd.write(pack(offsetformat, fhpos + pos)) except Exception as exc: # struct.error if self._imagej: warnings.warn( f'{self!r} truncating ImageJ file', UserWarning ) self._truncate = True return raise ValueError( 'data too large for non-BigTIFF file' ) from exc ifd.seek(pos) ifd.write(value) if code == self._dataoffsetstag: # save strip/tile offsets for later updates dataoffsetsoffset = offset, pos elif code == 330: # save subifds offsets for later updates subifdsoffsets = offset, pos elif code == self._dataoffsetstag: dataoffsetsoffset = offset, None elif code == 330: subifdsoffsets = offset, None ifdsize = ifd.tell() if ifdsize % 2: ifd.write(b'\x00') ifdsize += 1 # check if all IFDs fit in file if offsetsize < 8 and fhpos + ifdsize * pageno > 2**32 - 32: if self._imagej: warnings.warn(f'{self!r} truncating ImageJ file', UserWarning) self._truncate = True return raise ValueError('data too large for non-BigTIFF file') # assemble IFD chain in memory from IFD template ifds = io.BytesIO(bytes(ifdsize * pageno)) ifdpos = fhpos for _ in range(pageno): # update strip/tile offsets in IFD dataoffset += pagedatasize # offset to image data offset, pos = dataoffsetsoffset ifd.seek(offset) if pos is not None: ifd.write(pack(offsetformat, ifdpos + pos)) ifd.seek(pos) offset = dataoffset for size in self._databytecounts: ifd.write(pack(offsetformat, offset)) offset += size else: ifd.write(pack(offsetformat, dataoffset)) if subifdsoffsets is not None: offset, pos = subifdsoffsets self._subifdsoffsets.append( ifdpos + (pos if pos is not None else offset) ) if self._subifdslevel < 0: if subifdsoffsets is not None: # update pointer to SubIFDs tag values if necessary offset, pos = subifdsoffsets if pos is not None: ifd.seek(offset) ifd.write(pack(offsetformat, ifdpos + pos)) # update pointer at ifdoffset to point to next IFD in file ifdpos += ifdsize ifd.seek(ifdoffset) ifd.write(pack(offsetformat, ifdpos)) else: # update SubIFDs tag values in file fh.seek( self._subifdsoffsets[self._ifdindex] + self._subifdslevel * offsetsize ) fh.write(pack(offsetformat, ifdpos)) # update SubIFD chain if self._subifdslevel == 0: self._nextifdoffsets.append(ifdpos + ifdoffset) else: fh.seek(self._nextifdoffsets[self._ifdindex]) fh.write(pack(offsetformat, ifdpos)) self._nextifdoffsets[self._ifdindex] = ifdpos + ifdoffset self._ifdindex += 1 self._ifdindex %= len(self._subifdsoffsets) ifdpos += ifdsize # write IFD entry ifds.write(ifd.getbuffer()) # terminate IFD chain ifdoffset += ifdsize * (pageno - 1) ifds.seek(ifdoffset) ifds.write(pack(offsetformat, 0)) # write IFD chain to file fh.seek(fhpos) fh.write(ifds.getbuffer()) if self._subifdslevel < 0: # update file to point to new IFD chain pos = fh.tell() fh.seek(self._ifdoffset) fh.write(pack(offsetformat, fhpos)) fh.flush() fh.seek(pos) self._ifdoffset = fhpos + ifdoffset self._tags = None self._dataoffset = None self._databytecounts = None # do not reset _storedshape, _datashape, _datadtype def _write_image_description(self) -> None: """Write metadata to ImageDescription tag.""" if self._datashape is None or self._descriptiontag is None: self._descriptiontag = None return assert self._storedshape is not None assert self._datadtype is not None if self._omexml is not None: if self._subifdslevel < 0: assert self._metadata is not None self._omexml.addimage( dtype=self._datadtype, shape=self._datashape[ 0 if self._datashape[0] != 1 else 1 : ], storedshape=self._storedshape.shape, **self._metadata, ) description = self._omexml.tostring(declaration=True) elif self._datashape[0] == 1: # description already up-to-date self._descriptiontag = None return # elif self._subifdslevel >= 0: # # don't write metadata to SubIFDs # return elif self._imagej: assert self._metadata is not None colormapped = self._colormap is not None isrgb = self._storedshape.samples in {3, 4} description = imagej_description( self._datashape, rgb=isrgb, colormaped=colormapped, **self._metadata, ) elif not self._tifffile: self._descriptiontag = None return else: assert self._metadata is not None description = shaped_description(self._datashape, **self._metadata) self._descriptiontag.overwrite(description.encode(), erase=False) self._descriptiontag = None def _addtag( self, tags: list[tuple[int, bytes, bytes | None, bool]], code: int | str, dtype: int | str, count: int | None, value: Any, writeonce: bool = False, /, ) -> None: """Append (code, ifdentry, ifdvalue, writeonce) to tags list. Compute ifdentry and ifdvalue bytes from code, dtype, count, value. """ pack = self._pack if not isinstance(code, int): code = TIFF.TAGS[code] try: datatype = cast(int, dtype) dataformat = TIFF.DATA_FORMATS[datatype][-1] except KeyError as exc: try: dataformat = cast(str, dtype) if dataformat[0] in '<>': dataformat = dataformat[1:] datatype = TIFF.DATA_DTYPES[dataformat] except (KeyError, TypeError): raise ValueError(f'unknown dtype {dtype}') from exc del dtype rawcount = count if datatype == 2: # string if isinstance(value, str): # enforce 7-bit ASCII on Unicode strings try: value = value.encode('ascii') except UnicodeEncodeError as exc: raise ValueError( 'TIFF strings must be 7-bit ASCII' ) from exc elif not isinstance(value, bytes): raise ValueError('TIFF strings must be 7-bit ASCII') if len(value) == 0 or value[-1:] != b'\x00': value += b'\x00' count = len(value) if code == 270: rawcount = int(value.find(b'\x00\x00')) if rawcount < 0: rawcount = count else: # length of string without buffer rawcount = max(self.tiff.offsetsize + 1, rawcount + 1) rawcount = min(count, rawcount) else: rawcount = count value = (value,) elif isinstance(value, bytes): # packed binary data itemsize = struct.calcsize(dataformat) if len(value) % itemsize: raise ValueError('invalid packed binary data') count = len(value) // itemsize rawcount = count elif count is None: raise ValueError('invalid count') else: count = int(count) if datatype in {5, 10}: # rational count *= 2 dataformat = dataformat[-1] ifdentry = [ pack('HH', code, datatype), pack(self.tiff.offsetformat, rawcount), ] ifdvalue = None if struct.calcsize(dataformat) * count <= self.tiff.offsetsize: # value(s) can be written directly valueformat = f'{self.tiff.offsetsize}s' if isinstance(value, bytes): ifdentry.append(pack(valueformat, value)) elif count == 1: if isinstance(value, (tuple, list, numpy.ndarray)): value = value[0] ifdentry.append(pack(valueformat, pack(dataformat, value))) else: ifdentry.append( pack(valueformat, pack(f'{count}{dataformat}', *value)) ) else: # use offset to value(s) ifdentry.append(pack(self.tiff.offsetformat, 0)) if isinstance(value, bytes): ifdvalue = value elif isinstance(value, numpy.ndarray): if value.size != count: raise RuntimeError('value.size != count') if value.dtype.char != dataformat: raise RuntimeError('value.dtype.char != dtype') ifdvalue = value.tobytes() elif isinstance(value, (tuple, list)): ifdvalue = pack(f'{count}{dataformat}', *value) else: ifdvalue = pack(dataformat, value) tags.append((code, b''.join(ifdentry), ifdvalue, writeonce)) def _pack(self, fmt: str, *val: Any) -> bytes: """Return values packed to bytes according to format.""" if fmt[0] not in '<>': fmt = self.tiff.byteorder + fmt return struct.pack(fmt, *val) def _bytecount_format( self, bytecounts: Sequence[int], compression: int, / ) -> str: """Return small bytecount format.""" if len(bytecounts) == 1: return self.tiff.offsetformat[1] bytecount = bytecounts[0] if compression > 1: bytecount = bytecount * 10 if bytecount < 2**16: return 'H' if bytecount < 2**32: return 'I' return self.tiff.offsetformat[1] def _now(self) -> datetime.datetime: """Return current date and time.""" return datetime.datetime.now() @staticmethod def _maxworkers( maxworkers: int | None, numchunks: int, chunksize: int, compression: int, ) -> int: """Return number of threads to encode segments.""" if maxworkers is not None: return maxworkers if ( imagecodecs is None or compression <= 1 or numchunks < 2 or chunksize < 1024 or compression == 48124 # Jetraw is not thread-safe? ): return 1 # the following is based on benchmarking RGB tile sizes vs maxworkers # using a (8228, 11500, 3) uint8 WSI slide: if chunksize < 131072 and compression in { 7, # JPEG 33007, # ALT_JPG 32773, # PackBits 34887, # LERC }: return 1 if chunksize < 32768 and compression in { 5, # LZW 8, # zlib 32946, # zlib 50000, # zstd 50013, # zlib/pixtiff }: # zlib, return 1 if chunksize < 8192 and compression in { 34934, # JPEG XR 22610, # JPEG XR 34933, # PNG }: return 1 if chunksize < 2048 and compression in { 33003, # JPEG2000 33004, # JPEG2000 33005, # JPEG2000 34712, # JPEG2000 50002, # JPEG XL 52546, # JPEG XL DNG }: return 1 if chunksize < 1024 and compression in { 34925, # LZMA 50001, # WebP }: return 1 if compression == 34887: # LERC # limit to 4 threads return min(numchunks, 4) return min(numchunks, TIFF.MAXWORKERS) def __enter__(self) -> TiffWriter: return self def __exit__(self, exc_type, exc_value, traceback) -> None: # type: ignore self.close() def __repr__(self) -> str: return f'' @final class TiffFile: """Read image and metadata from TIFF file. TiffFile instances must be closed with :py:meth:`TiffFile.close`, which is automatically called when using the 'with' context manager. TiffFile instances are not thread-safe. All attributes are read-only. Parameters: file: Specifies TIFF file to read. Open file objects must be positioned at the TIFF header. mode: File open mode if `file` is file name. The default is 'rb'. name: Name of file if `file` is file handle. offset: Start position of embedded file. The default is the current file position. size: Size of embedded file. The default is the number of bytes from the `offset` to the end of the file. omexml: OME metadata in XML format, for example, from external companion file or sanitized XML overriding XML in file. _multifile, _useframes, _parent: Internal use. **is_flags: Override `TiffFile.is_` flags, for example: ``is_ome=False``: disable processing of OME-XML metadata. ``is_lsm=False``: disable special handling of LSM files. ``is_ndpi=True``: force file to be NDPI format. Raises: TiffFileError: Invalid TIFF structure. """ tiff: TiffFormat """Properties of TIFF file format.""" pages: TiffPages """Sequence of pages in TIFF file.""" _fh: FileHandle _multifile: bool _parent: TiffFile # OME master file _files: dict[str | None, TiffFile] # cache of TiffFile instances _omexml: str | None # external OME-XML _decoders: dict[ # cache of TiffPage.decode functions int, Callable[ ..., tuple[ NDArray[Any] | None, tuple[int, int, int, int, int], tuple[int, int, int, int], ], ], ] def __init__( self, file: str | os.PathLike[Any] | FileHandle | BinaryIO, /, *, mode: Literal['r', 'r+'] | None = None, name: str | None = None, offset: int | None = None, size: int | None = None, omexml: str | None = None, _multifile: bool | None = None, _useframes: bool | None = None, _parent: TiffFile | None = None, **is_flags: bool | None, ) -> None: for key, value in is_flags.items(): if key[:3] == 'is_' and key[3:] in TIFF.FILE_FLAGS: if value is not None: setattr(self, key, bool(value)) else: raise TypeError(f'unexpected keyword argument: {key}') if mode not in {None, 'r', 'r+', 'rb', 'r+b'}: raise ValueError(f'invalid mode {mode!r}') self._omexml = None if omexml: if omexml.strip()[-4:] != 'OME>': raise ValueError('invalid OME-XML') self._omexml = omexml self.is_ome = True fh = FileHandle(file, mode=mode, name=name, offset=offset, size=size) self._fh = fh self._multifile = True if _multifile is None else bool(_multifile) self._files = {fh.name: self} self._decoders = {} self._parent = self if _parent is None else _parent try: fh.seek(0) header = fh.read(4) try: byteorder = {b'II': '<', b'MM': '>', b'EP': '<'}[header[:2]] except KeyError as exc: raise TiffFileError(f'not a TIFF file {header!r}') from exc version = struct.unpack(byteorder + 'H', header[2:4])[0] if version == 43: # BigTiff offsetsize, zero = struct.unpack(byteorder + 'HH', fh.read(4)) if zero != 0 or offsetsize != 8: raise TiffFileError( f'invalid BigTIFF offset size {(offsetsize, zero)}' ) if byteorder == '>': self.tiff = TIFF.BIG_BE else: self.tiff = TIFF.BIG_LE elif version == 42: # Classic TIFF if byteorder == '>': self.tiff = TIFF.CLASSIC_BE elif is_flags.get('is_ndpi', fh.extension == '.ndpi'): # NDPI uses 64 bit IFD offsets if is_flags.get('is_ndpi', True): self.tiff = TIFF.NDPI_LE else: self.tiff = TIFF.CLASSIC_LE else: self.tiff = TIFF.CLASSIC_LE elif version == 0x4E31: # NIFF if byteorder == '>': raise TiffFileError('invalid NIFF file') logger().error(f'{self!r} NIFF format not supported') self.tiff = TIFF.CLASSIC_LE elif version in {0x55, 0x4F52, 0x5352}: # Panasonic or Olympus RAW logger().error( f'{self!r} RAW format 0x{version:04X} not supported' ) if byteorder == '>': self.tiff = TIFF.CLASSIC_BE else: self.tiff = TIFF.CLASSIC_LE else: raise TiffFileError(f'invalid TIFF version {version}') # file handle is at offset to offset to first page self.pages = TiffPages(self) if self.is_lsm and ( self.filehandle.size >= 2**32 or self.pages[0].compression != 1 or self.pages[1].compression != 1 ): self._lsm_load_pages() elif self.is_scanimage and not self.is_bigtiff: # ScanImage <= 2015 try: self.pages._load_virtual_frames() except Exception as exc: logger().error( f'{self!r} ' f'raised {exc!r:.128}' ) elif self.is_ndpi: try: self._ndpi_load_pages() except Exception as exc: logger().error( f'{self!r} <_ndpi_load_pages> raised {exc!r:.128}' ) elif _useframes: self.pages.useframes = True except Exception: fh.close() raise @property def byteorder(self) -> Literal['>', '<']: """Byteorder of TIFF file.""" return self.tiff.byteorder @property def filehandle(self) -> FileHandle: """File handle.""" return self._fh @property def filename(self) -> str: """Name of file handle.""" return self._fh.name @cached_property def fstat(self) -> Any: """Status of file handle's descriptor, if any.""" try: return os.fstat(self._fh.fileno()) except Exception: # io.UnsupportedOperation return None def close(self) -> None: """Close open file handle(s).""" for tif in self._files.values(): tif.filehandle.close() def asarray( self, key: int | slice | Iterable[int] | None = None, *, series: int | TiffPageSeries | None = None, level: int | None = None, squeeze: bool | None = None, out: OutputType = None, maxworkers: int | None = None, buffersize: int | None = None, ) -> NDArray[Any]: """Return images from select pages as NumPy array. By default, the image array from the first level of the first series is returned. Parameters: key: Specifies which pages to return as array. By default, the image of the specified `series` and `level` is returned. If not *None*, the images from the specified pages in the whole file (if `series` is *None*) or a specified series are returned as a stacked array. Requesting an array from multiple pages that are not compatible wrt. shape, dtype, compression etc. is undefined, that is, it may crash or return incorrect values. series: Specifies which series of pages to return as array. The default is 0. level: Specifies which level of multi-resolution series to return as array. The default is 0. squeeze: If *True*, remove all length-1 dimensions (except X and Y) from array. If *False*, single pages are returned as 5D array of shape :py:attr:`TiffPage.shaped`. For series, the shape of the returned array also includes singlet dimensions specified in some file formats. For example, ImageJ series and most commonly also OME series, are returned in TZCYXS order. By default, all but `"shaped"` series are squeezed. out: Specifies how image array is returned. By default, a new NumPy array is created. If a *numpy.ndarray*, a writable array to which the image is copied. If *'memmap'*, directly memory-map the image data in the file if possible; else create a memory-mapped array in a temporary file. If a *string* or *open file*, the file used to create a memory-mapped array. maxworkers: Maximum number of threads to concurrently decode data from multiple pages or compressed segments. If *None* or *0*, use up to :py:attr:`_TIFF.MAXWORKERS` threads. Reading data from file is limited to the main thread. Using multiple threads can significantly speed up this function if the bottleneck is decoding compressed data, for example, in case of large LZW compressed LSM files or JPEG compressed tiled slides. If the bottleneck is I/O or pure Python code, using multiple threads might be detrimental. buffersize: Approximate number of bytes to read from file in one pass. The default is :py:attr:`_TIFF.BUFFERSIZE`. Returns: Images from specified pages. See `TiffPage.asarray` for operations that are applied (or not) to the image data stored in the file. """ if not self.pages: return numpy.array([]) if key is None and series is None: series = 0 pages: Any # TiffPages | TiffPageSeries | list[TiffPage | TiffFrame] page0: TiffPage | TiffFrame | None if series is None: pages = self.pages else: if not isinstance(series, TiffPageSeries): series = self.series[series] if level is not None: series = series.levels[level] pages = series if key is None: pass elif series is None: pages = pages._getlist(key) elif isinstance(key, (int, numpy.integer)): pages = [pages[int(key)]] elif isinstance(key, slice): pages = pages[key] elif isinstance(key, collections.abc.Iterable) and not isinstance( key, str ): pages = [pages[k] for k in key] else: raise TypeError( f'key must be an integer, slice, or sequence, not {type(key)}' ) if pages is None or len(pages) == 0: raise ValueError('no pages selected') if ( key is None and series is not None and series.dataoffset is not None ): typecode = self.byteorder + series.dtype.char if ( series.keyframe.is_memmappable and isinstance(out, str) and out == 'memmap' ): # direct mapping shape = series.get_shape(squeeze) result = self.filehandle.memmap_array( typecode, shape, series.dataoffset ) else: # read into output shape = series.get_shape(squeeze) if out is not None: out = create_output(out, shape, series.dtype) result = self.filehandle.read_array( typecode, series.size, series.dataoffset, out=out, ) elif len(pages) == 1: page0 = pages[0] if page0 is None: raise ValueError('page is None') result = page0.asarray( out=out, maxworkers=maxworkers, buffersize=buffersize ) else: result = stack_pages( pages, out=out, maxworkers=maxworkers, buffersize=buffersize ) if result is None: return None if key is None: assert series is not None # TODO: ? shape = series.get_shape(squeeze) try: result.shape = shape except ValueError as exc: try: logger().warning( f'{self!r} failed to reshape ' f'{result.shape} to {shape}, raised {exc!r:.128}' ) # try series of expected shapes result.shape = (-1,) + shape except ValueError: # revert to generic shape result.shape = (-1,) + series.keyframe.shape elif len(pages) == 1: if squeeze is None: squeeze = True page0 = pages[0] if page0 is None: raise ValueError('page is None') result.shape = page0.shape if squeeze else page0.shaped else: if squeeze is None: squeeze = True try: page0 = next(p for p in pages if p is not None) except StopIteration as exc: raise ValueError('pages are all None') from exc assert page0 is not None result.shape = (-1,) + (page0.shape if squeeze else page0.shaped) return result def aszarr( self, key: int | None = None, *, series: int | TiffPageSeries | None = None, level: int | None = None, **kwargs: Any, ) -> ZarrTiffStore: """Return images from select pages as Zarr store. By default, the images from the first series, including all levels, are wrapped as a Zarr store. Parameters: key: Index of page in file (if `series` is None) or series to wrap as Zarr store. By default, a series is wrapped. series: Index of series to wrap as Zarr store. The default is 0 (if `key` is None). level: Index of pyramid level in series to wrap as Zarr store. By default, all levels are included as a multi-scale group. **kwargs: Additional arguments passed to :py:meth:`TiffPage.aszarr` or :py:meth:`TiffPageSeries.aszarr`. """ if not self.pages: raise NotImplementedError('empty Zarr arrays not supported') if key is None and series is None: return self.series[0].aszarr(level=level, **kwargs) pages: Any if series is None: pages = self.pages else: if not isinstance(series, TiffPageSeries): series = self.series[series] if key is None: return series.aszarr(level=level, **kwargs) if level is not None: series = series.levels[level] pages = series if isinstance(key, (int, numpy.integer)): page: TiffPage | TiffFrame = pages[key] return page.aszarr(**kwargs) raise TypeError('key must be an integer index') @cached_property def series(self) -> list[TiffPageSeries]: """Series of pages with compatible shape and data type. Side effect: after accessing this property, `TiffFile.pages` might contain `TiffPage` and `TiffFrame` instead of only `TiffPage` instances. """ if not self.pages: return [] assert self.pages.keyframe is not None useframes = self.pages.useframes keyframe = self.pages.keyframe.index series: list[TiffPageSeries] | None = None for kind in ( 'shaped', 'lsm', 'mmstack', 'ome', 'imagej', 'ndtiff', 'fluoview', 'stk', 'sis', 'svs', 'scn', 'qpi', 'ndpi', 'bif', 'philips', 'scanimage', # 'indica', # TODO: rewrite _series_indica() 'nih', 'mdgel', # adds second page to cache 'uniform', ): if getattr(self, 'is_' + kind, False): series = getattr(self, '_series_' + kind)() if not series: if kind == 'ome' and self.is_imagej: # try ImageJ series if OME series fails. # clear pages cache since _series_ome() might leave # some frames without keyframe self.pages._clear() continue if kind == 'mmstack': # try OME, ImageJ, uniform continue break if not series: series = self._series_generic() self.pages.useframes = useframes self.pages.set_keyframe(keyframe) # remove empty series, for example, in MD Gel files # series = [s for s in series if product(s.shape) > 0] assert series is not None for i, s in enumerate(series): s._index = i return series def _series_uniform(self) -> list[TiffPageSeries] | None: """Return all images in file as single series.""" self.pages.useframes = True self.pages.set_keyframe(0) page = self.pages.first validate = not (page.is_scanimage or page.is_nih) pages = self.pages._getlist(validate=validate) if len(pages) == 1: shape = page.shape axes = page.axes else: shape = (len(pages),) + page.shape axes = 'I' + page.axes dtype = page.dtype return [TiffPageSeries(pages, shape, dtype, axes, kind='uniform')] def _series_generic(self) -> list[TiffPageSeries] | None: """Return image series in file. A series is a sequence of TiffPages with the same hash. """ pages = self.pages pages._clear(False) pages.useframes = False if pages.cache: pages._load() series = [] keys = [] seriesdict: dict[int, list[TiffPage | TiffFrame]] = {} def addpage(page: TiffPage | TiffFrame, /) -> None: # add page to seriesdict if not page.shape: # or product(page.shape) == 0: return key = page.hash if key in seriesdict: for p in seriesdict[key]: if p.offset == page.offset: break # remove duplicate page else: seriesdict[key].append(page) else: keys.append(key) seriesdict[key] = [page] for page in pages: addpage(page) if page.subifds is not None: for i, offset in enumerate(page.subifds): if offset < 8: continue try: self._fh.seek(offset) subifd = TiffPage(self, (page.index, i)) except Exception as exc: logger().warning( f'{self!r} generic series raised {exc!r:.128}' ) else: addpage(subifd) for key in keys: pagelist = seriesdict[key] page = pagelist[0] shape = (len(pagelist),) + page.shape axes = 'I' + page.axes if 'S' not in axes: shape += (1,) axes += 'S' series.append( TiffPageSeries( pagelist, shape, page.dtype, axes, kind='generic' ) ) self.is_uniform = len(series) == 1 if not self.is_agilent: pyramidize_series(series) return series def _series_shaped(self) -> list[TiffPageSeries] | None: """Return image series in tifffile "shaped" formatted file.""" # TODO: all series need to have JSON metadata for this to succeed def append( series: list[TiffPageSeries], pages: list[TiffPage | TiffFrame | None], axes: str | None, shape: tuple[int, ...] | None, reshape: tuple[int, ...], name: str, truncated: bool | None, /, ) -> None: # append TiffPageSeries to series assert isinstance(pages[0], TiffPage) page = pages[0] if not check_shape(page.shape, reshape): logger().warning( f'{self!r} shaped series metadata does not match ' f'page shape {page.shape} != {tuple(reshape)}' ) failed = True else: failed = False if failed or axes is None or shape is None: shape = page.shape axes = page.axes if len(pages) > 1: shape = (len(pages),) + shape axes = 'Q' + axes if failed: reshape = shape size = product(shape) resize = product(reshape) if page.is_contiguous and resize > size and resize % size == 0: if truncated is None: truncated = True axes = 'Q' + axes shape = (resize // size,) + shape try: axes = reshape_axes(axes, shape, reshape) shape = reshape except ValueError as exc: logger().error( f'{self!r} shaped series failed to reshape, ' f'raised {exc!r:.128}' ) series.append( TiffPageSeries( pages, shape, page.dtype, axes, name=name, kind='shaped', truncated=bool(truncated), squeeze=False, ) ) def detect_series( pages: TiffPages | list[TiffPage | TiffFrame | None], series: list[TiffPageSeries], /, ) -> list[TiffPageSeries] | None: shape: tuple[int, ...] | None reshape: tuple[int, ...] page: TiffPage | TiffFrame | None keyframe: TiffPage subifds: list[TiffPage | TiffFrame | None] = [] subifd: TiffPage | TiffFrame keysubifd: TiffPage axes: str | None name: str lenpages = len(pages) index = 0 while True: if index >= lenpages: break if isinstance(pages, TiffPages): # new keyframe; start of new series pages.set_keyframe(index) keyframe = cast(TiffPage, pages.keyframe) else: # pages is list of SubIFDs keyframe = cast(TiffPage, pages[0]) if keyframe.shaped_description is None: logger().error( f'{self!r} ' 'invalid shaped series metadata or corrupted file' ) return None # read metadata axes = None shape = None metadata = shaped_description_metadata( keyframe.shaped_description ) name = metadata.get('name', '') reshape = metadata['shape'] truncated = None if keyframe.subifds is None else False truncated = metadata.get('truncated', truncated) if 'axes' in metadata: axes = cast(str, metadata['axes']) if len(axes) == len(reshape): shape = reshape else: axes = '' logger().error( f'{self!r} shaped series axes do not match shape' ) # skip pages if possible spages: list[TiffPage | TiffFrame | None] = [keyframe] size = product(reshape) if size > 0: npages, mod = divmod(size, product(keyframe.shape)) else: npages = 1 mod = 0 if mod: logger().error( f'{self!r} ' 'shaped series shape does not match page shape' ) return None if 1 < npages <= lenpages - index: assert keyframe._dtype is not None size *= keyframe._dtype.itemsize if truncated: npages = 1 else: page = pages[index + 1] if ( keyframe.is_final and page is not None and keyframe.offset + size < page.offset and keyframe.subifds is None ): truncated = False else: # must read all pages for series truncated = False for j in range(index + 1, index + npages): page = pages[j] assert page is not None page.keyframe = keyframe spages.append(page) append(series, spages, axes, shape, reshape, name, truncated) index += npages # create series from SubIFDs if keyframe.subifds: subifds_size = len(keyframe.subifds) for i, offset in enumerate(keyframe.subifds): if offset < 8: continue subifds = [] for j, page in enumerate(spages): # if page.subifds is not None: try: if ( page is None or page.subifds is None or len(page.subifds) < subifds_size ): raise ValueError( f'{page!r} contains invalid subifds' ) self._fh.seek(page.subifds[i]) if j == 0: subifd = TiffPage(self, (page.index, i)) keysubifd = subifd else: subifd = TiffFrame( self, (page.index, i), keyframe=keysubifd, ) except Exception as exc: logger().error( f'{self!r} shaped series ' f'raised {exc!r:.128}' ) return None subifds.append(subifd) if subifds: series_or_none = detect_series(subifds, series) if series_or_none is None: return None series = series_or_none return series self.pages.useframes = True series = detect_series(self.pages, []) if series is None: return None self.is_uniform = len(series) == 1 pyramidize_series(series, isreduced=True) return series def _series_imagej(self) -> list[TiffPageSeries] | None: """Return image series in ImageJ file.""" # ImageJ's dimension order is TZCYXS # TODO: fix loading of color, composite, or palette images meta = self.imagej_metadata if meta is None: return None pages = self.pages pages.useframes = True pages.set_keyframe(0) page = self.pages.first order = meta.get('order', 'czt').lower() frames = meta.get('frames', 1) slices = meta.get('slices', 1) channels = meta.get('channels', 1) images = meta.get('images', 1) # not reliable if images < 1 or frames < 1 or slices < 1 or channels < 1: logger().warning( f'{self!r} ImageJ series metadata invalid or corrupted file' ) return None if channels == 1: images = frames * slices elif page.shaped[0] > 1 and page.shaped[0] == channels: # Bio-Formats declares separate samples as channels images = frames * slices elif images == frames * slices and page.shaped[4] == channels: # RGB contig samples declared as channel channels = 1 else: images = frames * slices * channels if images == 1 and pages.is_multipage: images = len(pages) nbytes = images * page.nbytes # ImageJ virtual hyperstacks store all image metadata in the first # page and image data are stored contiguously before the second # page, if any if not page.is_final: isvirtual = False elif page.dataoffsets[0] + nbytes > self.filehandle.size: logger().error( f'{self!r} ImageJ series metadata invalid or corrupted file' ) return None elif images <= 1: isvirtual = True elif ( pages.is_multipage and page.dataoffsets[0] + nbytes > pages[1].offset ): # next page is not stored after data isvirtual = False else: isvirtual = True page_list: list[TiffPage | TiffFrame] if isvirtual: # no need to read other pages page_list = [page] else: page_list = pages[:] shape: tuple[int, ...] axes: str if order in {'czt', 'default'}: axes = 'TZC' shape = (frames, slices, channels) elif order == 'ctz': axes = 'ZTC' shape = (slices, frames, channels) elif order == 'zct': axes = 'TCZ' shape = (frames, channels, slices) elif order == 'ztc': axes = 'CTZ' shape = (channels, frames, slices) elif order == 'tcz': axes = 'ZCT' shape = (slices, channels, frames) elif order == 'tzc': axes = 'CZT' shape = (channels, slices, frames) else: axes = 'TZC' shape = (frames, slices, channels) logger().warning( f'{self!r} ImageJ series of unknown order {order!r}' ) remain = images // product(shape) if remain > 1: logger().debug( f'{self!r} ImageJ series contains unidentified dimension' ) shape = (remain,) + shape axes = 'I' + axes if page.shaped[0] > 1: # Bio-Formats declares separate samples as channels assert axes[-1] == 'C' shape = shape[:-1] + page.shape axes += page.axes[1:] else: shape += page.shape axes += page.axes if 'S' not in axes: shape += (1,) axes += 'S' # assert axes.endswith('TZCYXS'), axes truncated = ( isvirtual and not pages.is_multipage and page.nbytes != nbytes ) self.is_uniform = True return [ TiffPageSeries( page_list, shape, page.dtype, axes, kind='imagej', truncated=truncated, ) ] def _series_nih(self) -> list[TiffPageSeries] | None: """Return all images in NIH Image file as single series.""" series = self._series_uniform() if series is not None: for s in series: s.kind = 'nih' return series def _series_scanimage(self) -> list[TiffPageSeries] | None: """Return image series in ScanImage file.""" pages = self.pages._getlist(validate=False) page = self.pages.first dtype = page.dtype shape = None meta = self.scanimage_metadata if meta is None: framedata = {} else: framedata = meta.get('FrameData', {}) if 'SI.hChannels.channelSave' in framedata: try: channels = framedata['SI.hChannels.channelSave'] try: # channelSave is a list of channel IDs channels = len(channels) except TypeError: # channelSave is a single channel ID channels = 1 # slices = framedata.get( # 'SI.hStackManager.actualNumSlices', # framedata.get('SI.hStackManager.numSlices', None), # ) # if slices is None: # raise ValueError('unable to determine numSlices') slices = None try: frames = int(framedata['SI.hStackManager.framesPerSlice']) except Exception as exc: # framesPerSlice is inf slices = 1 if len(pages) % channels: raise ValueError( 'unable to determine framesPerSlice' ) from exc frames = len(pages) // channels if slices is None: slices = max(len(pages) // (frames * channels), 1) shape = (slices, frames, channels) + page.shape axes = 'ZTC' + page.axes except Exception as exc: logger().warning( f'{self!r} ScanImage series raised {exc!r:.128}' ) # TODO: older versions of ScanImage store non-varying frame data in # the ImageDescription tag. Candidates are scanimage.SI5.channelsSave, # scanimage.SI5.stackNumSlices, scanimage.SI5.acqNumFrames # scanimage.SI4., state.acq.numberOfFrames, state.acq.numberOfFrames... if shape is None: shape = (len(pages),) + page.shape axes = 'I' + page.axes return [TiffPageSeries(pages, shape, dtype, axes, kind='scanimage')] def _series_fluoview(self) -> list[TiffPageSeries] | None: """Return image series in FluoView file.""" meta = self.fluoview_metadata if meta is None: return None pages = self.pages._getlist(validate=False) mmhd = list(reversed(meta['Dimensions'])) axes = ''.join(TIFF.MM_DIMENSIONS.get(i[0].upper(), 'Q') for i in mmhd) shape = tuple(int(i[1]) for i in mmhd) self.is_uniform = True return [ TiffPageSeries( pages, shape, pages[0].dtype, axes, name=meta['ImageName'], kind='fluoview', ) ] def _series_mdgel(self) -> list[TiffPageSeries] | None: """Return image series in MD Gel file.""" # only a single page, scaled according to metadata in second page meta = self.mdgel_metadata if meta is None: return None transform: Callable[[NDArray[Any]], NDArray[Any]] | None self.pages.useframes = False self.pages.set_keyframe(0) if meta['FileTag'] in {2, 128}: dtype = numpy.dtype('float32') scale = meta['ScalePixel'] scale = scale[0] / scale[1] # rational if meta['FileTag'] == 2: # squary root data format def transform(a: NDArray[Any], /) -> NDArray[Any]: return a.astype('float32') ** 2 * scale else: def transform(a: NDArray[Any], /) -> NDArray[Any]: return a.astype('float32') * scale else: transform = None page = self.pages.first self.is_uniform = False return [ TiffPageSeries( [page], page.shape, dtype, page.axes, transform=transform, kind='mdgel', ) ] def _series_ndpi(self) -> list[TiffPageSeries] | None: """Return pyramidal image series in NDPI file.""" series = self._series_generic() if series is None: return None for s in series: s.kind = 'ndpi' if s.axes[0] == 'I': s._set_dimensions(s.shape, 'Z' + s.axes[1:], None, True) if s.is_pyramidal: name = s.keyframe.tags.valueof(65427) s.name = 'Baseline' if name is None else name continue mag = s.keyframe.tags.valueof(65421) if mag is not None: if mag == -1.0: s.name = 'Macro' # s.kind += '_macro' elif mag == -2.0: s.name = 'Map' # s.kind += '_map' self.is_uniform = False return series def _series_philips(self) -> list[TiffPageSeries] | None: """Return pyramidal image series in Philips DP file.""" from xml.etree import ElementTree as etree series = [] pages = self.pages pages.cache = False pages.useframes = False pages.set_keyframe(0) pages._load() meta = self.philips_metadata assert meta is not None try: tree = etree.fromstring(meta) except etree.ParseError as exc: logger().error(f'{self!r} Philips series raised {exc!r:.128}') return None pixel_spacing = [ tuple(float(v) for v in elem.text.replace('"', '').split()) for elem in tree.findall( './/*' '/DataObject[@ObjectType="PixelDataRepresentation"]' '/Attribute[@Name="DICOM_PIXEL_SPACING"]' ) if elem.text is not None ] if len(pixel_spacing) < 2: logger().error( f'{self!r} Philips series {len(pixel_spacing)=} < 2' ) return None series_dict: dict[str, list[TiffPage]] = {} series_dict['Level'] = [] series_dict['Other'] = [] for page in pages: assert isinstance(page, TiffPage) if page.description.startswith('Macro'): series_dict['Macro'] = [page] elif page.description.startswith('Label'): series_dict['Label'] = [page] elif not page.is_tiled: series_dict['Other'].append(page) else: series_dict['Level'].append(page) levels = series_dict.pop('Level') if len(levels) != len(pixel_spacing): logger().error( f'{self!r} Philips series ' f'{len(levels)=} != {len(pixel_spacing)=}' ) return None # fix padding of sublevels imagewidth0 = levels[0].imagewidth imagelength0 = levels[0].imagelength h0, w0 = pixel_spacing[0] for serie, (h, w) in zip(levels[1:], pixel_spacing[1:]): page = serie.keyframe # if page.dtype.itemsize == 1: # page.nodata = 255 imagewidth = imagewidth0 // int(round(w / w0)) imagelength = imagelength0 // int(round(h / h0)) if page.imagewidth - page.tilewidth >= imagewidth: logger().warning( f'{self!r} Philips series {page.index=} ' f'{page.imagewidth=}-{page.tilewidth=} >= {imagewidth=}' ) page.imagewidth -= page.tilewidth - 1 elif page.imagewidth < imagewidth: logger().warning( f'{self!r} Philips series {page.index=} ' f'{page.imagewidth=} < {imagewidth=}' ) else: page.imagewidth = imagewidth imagewidth = page.imagewidth if page.imagelength - page.tilelength >= imagelength: logger().warning( f'{self!r} Philips series {page.index=} ' f'{page.imagelength=}-{page.tilelength=} >= {imagelength=}' ) page.imagelength -= page.tilelength - 1 # elif page.imagelength < imagelength: # # in this case image is padded with zero else: page.imagelength = imagelength imagelength = page.imagelength if page.shaped[-1] > 1: page.shape = (imagelength, imagewidth, page.shape[-1]) elif page.shaped[0] > 1: page.shape = (page.shape[0], imagelength, imagewidth) else: page.shape = (imagelength, imagewidth) page.shaped = ( page.shaped[:2] + (imagelength, imagewidth) + page.shaped[-1:] ) series = [TiffPageSeries([levels[0]], name='Baseline', kind='philips')] for i, page in enumerate(levels[1:]): series[0].levels.append( TiffPageSeries([page], name=f'Level{i + 1}', kind='philips') ) for key, value in series_dict.items(): for page in value: series.append(TiffPageSeries([page], name=key, kind='philips')) self.is_uniform = False return series def _series_indica(self) -> list[TiffPageSeries] | None: """Return pyramidal image series in IndicaLabs file.""" # TODO: need more IndicaLabs sample files # TODO: parse indica series from XML # TODO: alpha channels in SubIFDs or main IFDs from xml.etree import ElementTree as etree series = self._series_generic() if series is None or len(series) != 1: return series try: tree = etree.fromstring(self.pages.first.description) except etree.ParseError as exc: logger().error(f'{self!r} Indica series raised {exc!r:.128}') return series channel_names = [ channel.attrib['name'] for channel in tree.iter('channel') ] for s in series: s.kind = 'indica' # TODO: identify other dimensions if s.axes[0] == 'I' and s.shape[0] == len(channel_names): s._set_dimensions(s.shape, 'C' + s.axes[1:], None, True) if s.is_pyramidal: s.name = 'Baseline' self.is_uniform = False return series def _series_sis(self) -> list[TiffPageSeries] | None: """Return image series in Olympus SIS file.""" meta = self.sis_metadata if meta is None: return None pages = self.pages._getlist(validate=False) # TODO: this fails for VSI page = pages[0] lenpages = len(pages) if 'shape' in meta and 'axes' in meta: shape = meta['shape'] + page.shape axes = meta['axes'] + page.axes else: shape = (lenpages,) + page.shape axes = 'I' + page.axes self.is_uniform = True return [TiffPageSeries(pages, shape, page.dtype, axes, kind='sis')] def _series_qpi(self) -> list[TiffPageSeries] | None: """Return image series in PerkinElmer QPI file.""" series = [] pages = self.pages pages.cache = True pages.useframes = False pages.set_keyframe(0) pages._load() page0 = self.pages.first # Baseline # TODO: get name from ImageDescription XML ifds = [] index = 0 axes = 'C' + page0.axes dtype = page0.dtype pshape = page0.shape while index < len(pages): page = pages[index] if page.shape != pshape: break ifds.append(page) index += 1 shape = (len(ifds),) + pshape series.append( TiffPageSeries( ifds, shape, dtype, axes, name='Baseline', kind='qpi' ) ) if index < len(pages): # Thumbnail page = pages[index] series.append( TiffPageSeries( [page], page.shape, page.dtype, page.axes, name='Thumbnail', kind='qpi', ) ) index += 1 if page0.is_tiled: # Resolutions while index < len(pages): pshape = (pshape[0] // 2, pshape[1] // 2) + pshape[2:] ifds = [] while index < len(pages): page = pages[index] if page.shape != pshape: break ifds.append(page) index += 1 if len(ifds) != len(series[0].pages): break shape = (len(ifds),) + pshape series[0].levels.append( TiffPageSeries( ifds, shape, dtype, axes, name='Resolution', kind='qpi' ) ) if series[0].is_pyramidal and index < len(pages): # Macro page = pages[index] series.append( TiffPageSeries( [page], page.shape, page.dtype, page.axes, name='Macro', kind='qpi', ) ) index += 1 # Label if index < len(pages): page = pages[index] series.append( TiffPageSeries( [page], page.shape, page.dtype, page.axes, name='Label', kind='qpi', ) ) self.is_uniform = False return series def _series_svs(self) -> list[TiffPageSeries] | None: """Return image series in Aperio SVS file.""" if not self.pages.first.is_tiled: return None series = [] self.pages.cache = True self.pages.useframes = False self.pages.set_keyframe(0) self.pages._load() # baseline firstpage = self.pages.first if len(self.pages) == 1: self.is_uniform = False return [ TiffPageSeries( [firstpage], firstpage.shape, firstpage.dtype, firstpage.axes, name='Baseline', kind='svs', ) ] # thumbnail page = self.pages[1] thumnail = TiffPageSeries( [page], page.shape, page.dtype, page.axes, name='Thumbnail', kind='svs', ) # resolutions and focal planes levels = {firstpage.shape: [firstpage]} index = 2 while index < len(self.pages): page = cast(TiffPage, self.pages[index]) if not page.is_tiled or page.is_reduced: break if page.shape in levels: levels[page.shape].append(page) else: levels[page.shape] = [page] index += 1 zsize = len(levels[firstpage.shape]) if not all(len(level) == zsize for level in levels.values()): logger().warning(f'{self!r} SVS series focal planes do not match') zsize = 1 baseline = TiffPageSeries( levels[firstpage.shape], (zsize,) + firstpage.shape, firstpage.dtype, 'Z' + firstpage.axes, name='Baseline', kind='svs', ) for shape, level in levels.items(): if shape == firstpage.shape: continue page = level[0] baseline.levels.append( TiffPageSeries( level, (zsize,) + page.shape, page.dtype, 'Z' + page.axes, name='Resolution', kind='svs', ) ) series.append(baseline) series.append(thumnail) # Label, Macro; subfiletype 1, 9 for _ in range(2): if index == len(self.pages): break page = self.pages[index] if page.subfiletype == 9: # type: ignore name = 'Macro' else: name = 'Label' series.append( TiffPageSeries( [page], page.shape, page.dtype, page.axes, name=name, kind='svs', ) ) index += 1 self.is_uniform = False return series def _series_scn(self) -> list[TiffPageSeries] | None: """Return pyramidal image series in Leica SCN file.""" # TODO: support collections from xml.etree import ElementTree as etree scnxml = self.pages.first.description root = etree.fromstring(scnxml) series = [] self.pages.cache = True self.pages.useframes = False self.pages.set_keyframe(0) self.pages._load() for collection in root: if not collection.tag.endswith('collection'): continue for image in collection: if not image.tag.endswith('image'): continue name = image.attrib.get('name', 'Unknown') for pixels in image: if not pixels.tag.endswith('pixels'): continue resolutions: dict[int, dict[str, Any]] = {} for dimension in pixels: if not dimension.tag.endswith('dimension'): continue if int(image.attrib.get('sizeZ', 1)) > 1: raise NotImplementedError( 'SCN series: Z-Stacks not supported. ' 'Please submit a sample file.' ) sizex = int(dimension.attrib['sizeX']) sizey = int(dimension.attrib['sizeY']) c = int(dimension.attrib.get('c', 0)) z = int(dimension.attrib.get('z', 0)) r = int(dimension.attrib.get('r', 0)) ifd = int(dimension.attrib['ifd']) if r in resolutions: level = resolutions[r] if c > level['channels']: level['channels'] = c if z > level['sizez']: level['sizez'] = z level['ifds'][(c, z)] = ifd else: resolutions[r] = { 'size': [sizey, sizex], 'channels': c, 'sizez': z, 'ifds': {(c, z): ifd}, } if not resolutions: continue levels = [] for r, level in sorted(resolutions.items()): shape: tuple[int, ...] = ( level['channels'] + 1, level['sizez'] + 1, ) axes = 'CZ' ifds: list[TiffPage | TiffFrame | None] = [ None ] * product(shape) for (c, z), ifd in sorted(level['ifds'].items()): ifds[c * shape[1] + z] = self.pages[ifd] assert ifds[0] is not None axes += ifds[0].axes shape += ifds[0].shape dtype = ifds[0].dtype levels.append( TiffPageSeries( ifds, shape, dtype, axes, parent=self, name=name, kind='scn', ) ) levels[0].levels.extend(levels[1:]) series.append(levels[0]) self.is_uniform = False return series def _series_bif(self) -> list[TiffPageSeries] | None: """Return image series in Ventana/Roche BIF file.""" series = [] baseline = None self.pages.cache = True self.pages.useframes = False self.pages.set_keyframe(0) self.pages._load() for page in self.pages: page = cast(TiffPage, page) if page.description[:5] == 'Label': series.append( TiffPageSeries( [page], page.shape, page.dtype, page.axes, name='Label', kind='bif', ) ) elif ( page.description == 'Thumbnail' or page.description[:11] == 'Probability' ): series.append( TiffPageSeries( [page], page.shape, page.dtype, page.axes, name='Thumbnail', kind='bif', ) ) elif 'level' not in page.description: # TODO: is this necessary? series.append( TiffPageSeries( [page], page.shape, page.dtype, page.axes, name='Unknown', kind='bif', ) ) elif baseline is None: baseline = TiffPageSeries( [page], page.shape, page.dtype, page.axes, name='Baseline', kind='bif', ) series.insert(0, baseline) else: baseline.levels.append( TiffPageSeries( [page], page.shape, page.dtype, page.axes, name='Resolution', kind='bif', ) ) logger().warning(f'{self!r} BIF series tiles are not stiched') self.is_uniform = False return series def _series_ome(self) -> list[TiffPageSeries] | None: """Return image series in OME-TIFF file(s).""" # xml.etree found to be faster than lxml from xml.etree import ElementTree as etree omexml = self.ome_metadata if omexml is None: return None try: root = etree.fromstring(omexml) except etree.ParseError as exc: # TODO: test badly encoded OME-XML logger().error(f'{self!r} OME series raised {exc!r:.128}') return None keyframe: TiffPage ifds: list[TiffPage | TiffFrame | None] size: int = -1 def load_pages(tif: TiffFile, /) -> None: tif.pages.cache = True tif.pages.useframes = True tif.pages.set_keyframe(0) tif.pages._load(None) load_pages(self) root_uuid = root.attrib.get('UUID', None) self._files = {root_uuid: self} dirname = self._fh.dirname files_missing = 0 moduloref = [] modulo: dict[str, dict[str, tuple[str, int]]] = {} series: list[TiffPageSeries] = [] for element in root: if element.tag.endswith('BinaryOnly'): # TODO: load OME-XML from master or companion file logger().debug( f'{self!r} OME series is BinaryOnly, ' 'not an OME-TIFF master file' ) break if element.tag.endswith('StructuredAnnotations'): for annot in element: if not annot.attrib.get('Namespace', '').endswith( 'modulo' ): continue modulo[annot.attrib['ID']] = mod = {} # type: ignore for value in annot: for modulo_ns in value: for along in modulo_ns: if not along.tag[:-1].endswith('Along'): continue axis = along.tag[-1] newaxis = along.attrib.get('Type', 'other') newaxis = TIFF.AXES_CODES[newaxis] if 'Start' in along.attrib: step = float(along.attrib.get('Step', 1)) start = float(along.attrib['Start']) stop = float(along.attrib['End']) + step labels = len( numpy.arange(start, stop, step) ) else: labels = len( [ label for label in along if label.tag.endswith('Label') ] ) mod[axis] = (newaxis, labels) if not element.tag.endswith('Image'): continue for annot in element: if annot.tag.endswith('AnnotationRef'): annotationref = annot.attrib['ID'] break else: annotationref = None attr = element.attrib name = attr.get('Name', None) for pixels in element: if not pixels.tag.endswith('Pixels'): continue attr = pixels.attrib # dtype = attr.get('PixelType', None) axes = ''.join(reversed(attr['DimensionOrder'])) shape = [int(attr['Size' + ax]) for ax in axes] ifds = [] spp = 1 # samples per pixel first = True for data in pixels: if data.tag.endswith('Channel'): attr = data.attrib if first: first = False spp = int(attr.get('SamplesPerPixel', spp)) if spp > 1: # correct channel dimension for spp shape = [ shape[i] // spp if ax == 'C' else shape[i] for i, ax in enumerate(axes) ] elif int(attr.get('SamplesPerPixel', 1)) != spp: raise ValueError( 'OME series cannot handle differing ' 'SamplesPerPixel' ) continue if not data.tag.endswith('TiffData'): continue attr = data.attrib ifd_index = int(attr.get('IFD', 0)) num = int(attr.get('NumPlanes', 1 if 'IFD' in attr else 0)) num = int(attr.get('PlaneCount', num)) idxs = [int(attr.get('First' + ax, 0)) for ax in axes[:-2]] try: idx = int(numpy.ravel_multi_index(idxs, shape[:-2])) except ValueError as exc: # ImageJ produces invalid ome-xml when cropping logger().warning( f'{self!r} ' 'OME series contains invalid TiffData index, ' f'raised {exc!r:.128}', ) continue for uuid in data: if not uuid.tag.endswith('UUID'): continue if ( root_uuid is None and uuid.text is not None and ( uuid.attrib.get('FileName', '').lower() == self.filename.lower() ) ): # no global UUID, use this file root_uuid = uuid.text self._files[root_uuid] = self._files[None] del self._files[None] elif uuid.text not in self._files: if not self._multifile: # abort reading multifile OME series # and fall back to generic series return [] fname = uuid.attrib['FileName'] try: if not self.filehandle.is_file: raise ValueError tif = TiffFile( os.path.join(dirname, fname), _parent=self ) load_pages(tif) except ( OSError, FileNotFoundError, ValueError, ) as exc: if files_missing == 0: logger().warning( f'{self!r} OME series failed to read ' f'{fname!r}, raised {exc!r:.128}. ' 'Missing data are zeroed' ) files_missing += 1 # assume that size is same as in previous file # if no NumPlanes or PlaneCount are given if num: size = num elif size == -1: raise ValueError( 'OME series missing ' 'NumPlanes or PlaneCount' ) from exc ifds.extend([None] * (size + idx - len(ifds))) break self._files[uuid.text] = tif tif.close() pages = self._files[uuid.text].pages try: size = num if num else len(pages) ifds.extend([None] * (size + idx - len(ifds))) for i in range(size): ifds[idx + i] = pages[ifd_index + i] except IndexError as exc: logger().warning( f'{self!r} ' 'OME series contains index out of range, ' f'raised {exc!r:.128}' ) # only process first UUID break else: # no uuid found pages = self.pages try: size = num if num else len(pages) ifds.extend([None] * (size + idx - len(ifds))) for i in range(size): ifds[idx + i] = pages[ifd_index + i] except IndexError as exc: logger().warning( f'{self!r} ' 'OME series contains index out of range, ' f'raised {exc!r:.128}' ) if not ifds or all(i is None for i in ifds): # skip images without data continue # find a keyframe for ifd in ifds: # try find a TiffPage if ifd is not None and ifd == ifd.keyframe: keyframe = cast(TiffPage, ifd) break else: # reload a TiffPage from file for i, ifd in enumerate(ifds): if ifd is not None: isclosed = ifd.parent.filehandle.closed if isclosed: ifd.parent.filehandle.open() ifd.parent.pages.set_keyframe(ifd.index) keyframe = cast( TiffPage, ifd.parent.pages[ifd.index] ) ifds[i] = keyframe if isclosed: keyframe.parent.filehandle.close() break # does the series spawn multiple files multifile = False for ifd in ifds: if ifd and ifd.parent != keyframe.parent: multifile = True break if spp > 1: if keyframe.planarconfig == 1: shape += [spp] axes += 'S' else: shape = shape[:-2] + [spp] + shape[-2:] axes = axes[:-2] + 'S' + axes[-2:] if 'S' not in axes: shape += [1] axes += 'S' # number of pages in the file might mismatch XML metadata, for # example Nikon-cell011.ome.tif or stack_t24_y2048_x2448.tiff size = max(product(shape) // keyframe.size, 1) if size < len(ifds): logger().warning( f'{self!r} ' f'OME series expected {size} frames, got {len(ifds)}' ) ifds = ifds[:size] elif size > len(ifds): logger().warning( f'{self!r} ' f'OME series is missing {size - len(ifds)} frames.' ' Missing data are zeroed' ) ifds.extend([None] * (size - len(ifds))) # FIXME: this implementation assumes the last dimensions are # stored in TIFF pages. Apparently that is not always the case. # For example, TCX (20000, 2, 500) is stored in 2 pages of # (20000, 500) in 'Image 7.ome_h00.tiff'. # For now, verify that shapes of keyframe and series match. # If not, skip series. squeezed = _squeeze_axes(shape, axes)[0] if keyframe.shape != tuple(squeezed[-len(keyframe.shape) :]): logger().warning( f'{self!r} OME series cannot handle discontiguous ' f'storage ({keyframe.shape} != ' f'{tuple(squeezed[-len(keyframe.shape) :])})', ) del ifds continue # set keyframe on all IFDs # each series must contain a TiffPage used as keyframe keyframes: dict[str, TiffPage] = { keyframe.parent.filehandle.name: keyframe } for i, page in enumerate(ifds): if page is None: continue fh = page.parent.filehandle if fh.name not in keyframes: if page.keyframe != page: # reload TiffPage from file isclosed = fh.closed if isclosed: fh.open() page.parent.pages.set_keyframe(page.index) page = page.parent.pages[page.index] ifds[i] = page if isclosed: fh.close() keyframes[fh.name] = cast(TiffPage, page) if page.keyframe != page: page.keyframe = keyframes[fh.name] moduloref.append(annotationref) series.append( TiffPageSeries( ifds, shape, keyframe.dtype, axes, parent=self, name=name, multifile=multifile, kind='ome', ) ) del ifds if files_missing > 1: logger().warning( f'{self!r} OME series failed to read {files_missing} files' ) # apply modulo according to AnnotationRef for aseries, annotationref in zip(series, moduloref): if annotationref not in modulo: continue shape = list(aseries.get_shape(False)) axes = aseries.get_axes(False) for axis, (newaxis, size) in modulo[annotationref].items(): i = axes.index(axis) if shape[i] == size: axes = axes.replace(axis, newaxis, 1) else: shape[i] //= size shape.insert(i + 1, size) axes = axes.replace(axis, axis + newaxis, 1) aseries._set_dimensions(shape, axes, None) # pyramids for aseries in series: keyframe = aseries.keyframe if keyframe.subifds is None: continue if len(self._files) > 1: # TODO: support multi-file pyramids; must re-open/close logger().warning( f'{self!r} OME series cannot read multi-file pyramids' ) break for level in range(len(keyframe.subifds)): found_keyframe = False ifds = [] for page in aseries.pages: if ( page is None or page.subifds is None or page.subifds[level] < 8 ): ifds.append(None) continue page.parent.filehandle.seek(page.subifds[level]) if page.keyframe == page: ifd = keyframe = TiffPage( self, (page.index, level + 1) ) found_keyframe = True elif not found_keyframe: raise RuntimeError('no keyframe found') else: ifd = TiffFrame( self, (page.index, level + 1), keyframe=keyframe ) ifds.append(ifd) if all(ifd_or_none is None for ifd_or_none in ifds): logger().warning( f'{self!r} OME series level {level + 1} is empty' ) break # fix shape shape = list(aseries.get_shape(False)) axes = aseries.get_axes(False) for i, ax in enumerate(axes): if ax == 'X': shape[i] = keyframe.imagewidth elif ax == 'Y': shape[i] = keyframe.imagelength # add series aseries.levels.append( TiffPageSeries( ifds, tuple(shape), keyframe.dtype, axes, parent=self, name=f'level {level + 1}', kind='ome', ) ) self.is_uniform = len(series) == 1 and len(series[0].levels) == 1 return series def _series_mmstack(self) -> list[TiffPageSeries] | None: """Return series in Micro-Manager stack file(s).""" settings = self.micromanager_metadata if ( settings is None or 'Summary' not in settings or 'IndexMap' not in settings ): return None pages: list[TiffPage | TiffFrame | None] page_count: int summary = settings['Summary'] indexmap = settings['IndexMap'] indexmap = indexmap[indexmap[:, 4].argsort()] if 'MicroManagerVersion' not in summary or 'Frames' not in summary: # TODO: handle MagellanStack? return None # determine CZTR shape from indexmap; TODO: is this necessary? indexmap_shape = (numpy.max(indexmap[:, :4], axis=0) + 1).tolist() indexmap_index = {'C': 0, 'Z': 1, 'T': 2, 'R': 3} # TODO: activate this? # if 'AxisOrder' in summary: # axesorder = summary['AxisOrder'] # keys = { # 'channel': 'C', # 'z': 'Z', # 'slice': 'Z', # 'position': 'R', # 'time': 'T', # } # axes = ''.join(keys[ax] for ax in reversed(axesorder)) axes = 'TR' if summary.get('TimeFirst', True) else 'RT' axes += 'ZC' if summary.get('SlicesFirst', True) else 'CZ' keys = { 'C': 'Channels', 'Z': 'Slices', 'R': 'Positions', 'T': 'Frames', } shape = tuple( max( indexmap_shape[indexmap_index[ax]], int(summary.get(keys[ax], 1)), ) for ax in axes ) size = product(shape) indexmap_order = tuple(indexmap_index[ax] for ax in axes) def add_file(tif: TiffFile, indexmap: NDArray[Any]) -> int: # add virtual TiffFrames to pages list page_count = 0 offsets = indexmap[:, 4].tolist() indices = numpy.ravel_multi_index( # type: ignore indexmap[:, indexmap_order].T, shape ).tolist() keyframe = tif.pages.first filesize = tif.filehandle.size - keyframe.databytecounts[0] - 162 index: int offset: int for index, offset in zip(indices, offsets): if offset == keyframe.offset: pages[index] = keyframe page_count += 1 continue if 0 < offset <= filesize: dataoffsets = (offset + 162,) databytecounts = keyframe.databytecounts page_count += 1 else: # assume file is truncated dataoffsets = databytecounts = (0,) offset = 0 pages[index] = TiffFrame( tif, index=index, offset=offset, dataoffsets=dataoffsets, databytecounts=databytecounts, keyframe=keyframe, ) return page_count multifile = size > indexmap.shape[0] if multifile: # get multifile prefix if not self.filehandle.is_file: logger().warning( f'{self!r} MMStack multi-file series cannot be read from ' f'{self.filehandle._fh!r}' ) multifile = False elif '_MMStack' not in self.filename: logger().warning(f'{self!r} MMStack file name is invalid') multifile = False elif 'Prefix' in summary: prefix = summary['Prefix'] if not self.filename.startswith(prefix): logger().warning(f'{self!r} MMStack file name is invalid') multifile = False else: prefix = self.filename.split('_MMStack')[0] if multifile: # read other files pattern = os.path.join( self.filehandle.dirname, prefix + '_MMStack*.tif' ) filenames = glob.glob(pattern) if len(filenames) == 1: multifile = False else: pages = [None] * size page_count = add_file(self, indexmap) for fname in filenames: if self.filename == os.path.split(fname)[-1]: continue with TiffFile(fname) as tif: indexmap = read_micromanager_metadata( tif.filehandle, {'IndexMap'} )['IndexMap'] indexmap = indexmap[indexmap[:, 4].argsort()] page_count += add_file(tif, indexmap) if multifile: pass elif size > indexmap.shape[0]: # other files missing: squeeze shape old_shape = shape min_index = numpy.min(indexmap[:, :4], axis=0) max_index = numpy.max(indexmap[:, :4], axis=0) indexmap = indexmap.copy() indexmap[:, :4] -= min_index shape = tuple( j - i + 1 for i, j in zip(min_index.tolist(), max_index.tolist()) ) shape = tuple(shape[i] for i in indexmap_order) size = product(shape) pages = [None] * size page_count = add_file(self, indexmap) logger().warning( f'{self!r} MMStack series is missing files. ' f'Returning subset {shape!r} of {old_shape!r}' ) else: # single file pages = [None] * size page_count = add_file(self, indexmap) if page_count != size: logger().warning( f'{self!r} MMStack is missing {size - page_count} pages.' ' Missing data are zeroed' ) keyframe = self.pages.first return [ TiffPageSeries( pages, shape=shape + keyframe.shape, dtype=keyframe.dtype, axes=axes + keyframe.axes, # axestiled=axestiled, # axesoverlap=axesoverlap, # coords=coords, parent=self, kind='mmstack', multifile=multifile, squeeze=True, ) ] def _series_ndtiff(self) -> list[TiffPageSeries] | None: """Return series in NDTiff v2 and v3 files.""" # TODO: implement fallback for missing index file, versions 0 and 1 if not self.filehandle.is_file: logger().warning( f'{self!r} NDTiff.index not found for {self.filehandle._fh!r}' ) return None indexfile = os.path.join(self.filehandle.dirname, 'NDTiff.index') if not os.path.exists(indexfile): logger().warning(f'{self!r} NDTiff.index not found') return None keyframes: dict[str, TiffPage] = {} shape: tuple[int, ...] dims: tuple[str, ...] page: TiffPage | TiffFrame pageindex = 0 pixel_types = { 0: ('uint8', 8), # 8bit monochrome 1: ('uint16', 16), # 16bit monochrome 2: ('uint8', 8), # 8bit RGB 3: ('uint16', 10), # 10bit monochrome 4: ('uint16', 12), # 12bit monochrome 5: ('uint16', 14), # 14bit monochrome 6: ('uint16', 11), # 11bit monochrome } indices: dict[tuple[int, ...], TiffPage | TiffFrame] = {} categories: dict[str, dict[str, int]] = {} first = True for ( axes_dict, filename, dataoffset, width, height, pixeltype, compression, metaoffset, metabytecount, metacompression, ) in read_ndtiff_index(indexfile): if filename in keyframes: # create virtual frame from index pageindex += 1 # TODO keyframe = keyframes[filename] page = TiffFrame( keyframe.parent, pageindex, offset=None, # virtual frame keyframe=keyframe, dataoffsets=(dataoffset,), databytecounts=keyframe.databytecounts, ) if page.shape[:2] != (height, width): raise ValueError( 'NDTiff.index does not match TIFF shape ' f'{page.shape[:2]} != {(height, width)}' ) if compression != 0: raise ValueError( 'NDTiff.index compression {compression} not supported' ) if page.compression != 1: raise ValueError( 'NDTiff.index does not match TIFF compression ' f'{page.compression!r}' ) if pixeltype not in pixel_types: raise ValueError( f'NDTiff.index unknown pixel type {pixeltype}' ) dtype, _ = pixel_types[pixeltype] if page.dtype != dtype: raise ValueError( 'NDTiff.index pixeltype does not match TIFF dtype ' f'{page.dtype} != {dtype}' ) elif filename == self.filename: # use first page as keyframe pageindex = 0 page = self.pages.first keyframes[filename] = page else: # read keyframe from file pageindex = 0 with TiffFile( os.path.join(self.filehandle.dirname, filename) ) as tif: page = tif.pages.first keyframes[filename] = page # replace string with integer indices index: int | str if first: for axis, index in axes_dict.items(): if isinstance(index, str): categories[axis] = {index: 0} axes_dict[axis] = 0 first = False elif categories: for axis, values in categories.items(): index = axes_dict[axis] assert isinstance(index, str) if index not in values: values[index] = max(values.values()) + 1 axes_dict[axis] = values[index] indices[tuple(axes_dict.values())] = page # type: ignore dims = tuple(axes_dict.keys()) # indices may be negative or missing indices_array = numpy.array(list(indices.keys()), dtype=numpy.int32) min_index = numpy.min(indices_array, axis=0).tolist() max_index = numpy.max(indices_array, axis=0).tolist() shape = tuple(j - i + 1 for i, j in zip(min_index, max_index)) # change axes to match storage order order = order_axes(indices_array, squeeze=False) shape = tuple(shape[i] for i in order) dims = tuple(dims[i] for i in order) indices = { tuple(index[i] - min_index[i] for i in order): value for index, value in indices.items() } pages: list[TiffPage | TiffFrame | None] = [] for idx in numpy.ndindex(shape): pages.append(indices.get(idx, None)) keyframe = next(i for i in keyframes.values()) shape += keyframe.shape dims += keyframe.dims axes = ''.join(TIFF.AXES_CODES.get(i.lower(), 'Q') for i in dims) # TODO: support tiled axes and overlap # meta: Any = self.micromanager_metadata # if meta is None: # meta = {} # elif 'Summary' in meta: # meta = meta['Summary'] # # map axes column->x, row->y # axestiled: dict[int, int] = {} # axesoverlap: dict[int, int] = {} # if 'column' in dims: # key = dims.index('column') # axestiled[key] = keyframe.axes.index('X') # axesoverlap[key] = meta.get('GridPixelOverlapX', 0) # if 'row' in dims: # key = dims.index('row') # axestiled[key] = keyframe.axes.index('Y') # axesoverlap[key] = meta.get('GridPixelOverlapY', 0) # if all(i == 0 for i in axesoverlap.values()): # axesoverlap = {} self.is_uniform = True return [ TiffPageSeries( pages, shape=shape, dtype=keyframe.dtype, axes=axes, # axestiled=axestiled, # axesoverlap=axesoverlap, # coords=coords, parent=self, kind='ndtiff', multifile=len(keyframes) > 1, squeeze=True, ) ] def _series_stk(self) -> list[TiffPageSeries] | None: """Return series in STK file.""" meta = self.stk_metadata if meta is None: return None page = self.pages.first planes = meta['NumberPlanes'] name = meta.get('Name', '') if planes == 1: shape = (1,) + page.shape axes = 'I' + page.axes elif numpy.all(meta['ZDistance'] != 0): shape = (planes,) + page.shape axes = 'Z' + page.axes elif numpy.all(numpy.diff(meta['TimeCreated']) != 0): shape = (planes,) + page.shape axes = 'T' + page.axes else: # TODO: determine other/combinations of dimensions shape = (planes,) + page.shape axes = 'I' + page.axes self.is_uniform = True series = TiffPageSeries( [page], shape, page.dtype, axes, name=name, truncated=planes > 1, kind='stk', ) return [series] def _series_lsm(self) -> list[TiffPageSeries] | None: """Return main and thumbnail series in LSM file.""" lsmi = self.lsm_metadata if lsmi is None: return None axes = TIFF.CZ_LSMINFO_SCANTYPE[lsmi['ScanType']] if self.pages.first.photometric == 2: # RGB; more than one channel axes = axes.replace('C', '').replace('XY', 'XYC') if lsmi.get('DimensionP', 0) > 0: axes += 'P' if lsmi.get('DimensionM', 0) > 0: axes += 'M' axes = axes[::-1] shape = tuple(int(lsmi[TIFF.CZ_LSMINFO_DIMENSIONS[i]]) for i in axes) name = lsmi.get('Name', '') pages = self.pages._getlist(slice(0, None, 2), validate=False) dtype = pages[0].dtype series = [ TiffPageSeries(pages, shape, dtype, axes, name=name, kind='lsm') ] page = cast(TiffPage, self.pages[1]) if page.is_reduced: pages = self.pages._getlist(slice(1, None, 2), validate=False) dtype = page.dtype cp = 1 i = 0 while cp < len(pages) and i < len(shape) - 2: cp *= shape[i] i += 1 shape = shape[:i] + page.shape axes = axes[:i] + page.axes series.append( TiffPageSeries( pages, shape, dtype, axes, name=name, kind='lsm' ) ) self.is_uniform = False return series def _lsm_load_pages(self) -> None: """Read and fix all pages from LSM file.""" # cache all pages to preserve corrected values pages = self.pages pages.cache = True pages.useframes = True # use first and second page as keyframes pages.set_keyframe(1) pages.set_keyframe(0) # load remaining pages as frames pages._load(None) # fix offsets and bytecounts first # TODO: fix multiple conversions between lists and tuples self._lsm_fix_strip_offsets() self._lsm_fix_strip_bytecounts() # assign keyframes for data and thumbnail series keyframe = self.pages.first for page in pages._pages[::2]: page.keyframe = keyframe # type: ignore keyframe = cast(TiffPage, pages[1]) for page in pages._pages[1::2]: page.keyframe = keyframe # type: ignore def _lsm_fix_strip_offsets(self) -> None: """Unwrap strip offsets for LSM files greater than 4 GB. Each series and position require separate unwrapping (undocumented). """ if self.filehandle.size < 2**32: return pages = self.pages npages = len(pages) series = self.series[0] axes = series.axes # find positions positions = 1 for i in 0, 1: if series.axes[i] in 'PM': positions *= series.shape[i] # make time axis first if positions > 1: ntimes = 0 for i in 1, 2: if axes[i] == 'T': ntimes = series.shape[i] break if ntimes: div, mod = divmod(npages, 2 * positions * ntimes) if mod != 0: raise RuntimeError('mod != 0') shape = (positions, ntimes, div, 2) indices = numpy.arange(product(shape)).reshape(shape) indices = numpy.moveaxis(indices, 1, 0) else: indices = numpy.arange(npages).reshape(-1, 2) else: indices = numpy.arange(npages).reshape(-1, 2) # images of reduced page might be stored first if pages[0].dataoffsets[0] > pages[1].dataoffsets[0]: indices = indices[..., ::-1] # unwrap offsets wrap = 0 previousoffset = 0 for npi in indices.flat: page = pages[int(npi)] dataoffsets = [] if all(i <= 0 for i in page.dataoffsets): logger().warning( f'{self!r} LSM file incompletely written at {page}' ) break for currentoffset in page.dataoffsets: if currentoffset < previousoffset: wrap += 2**32 dataoffsets.append(currentoffset + wrap) previousoffset = currentoffset page.dataoffsets = tuple(dataoffsets) def _lsm_fix_strip_bytecounts(self) -> None: """Set databytecounts to size of compressed data. The StripByteCounts tag in LSM files contains the number of bytes for the uncompressed data. """ if self.pages.first.compression == 1: return # sort pages by first strip offset pages = sorted(self.pages, key=lambda p: p.dataoffsets[0]) npages = len(pages) - 1 for i, page in enumerate(pages): if page.index % 2: continue offsets = page.dataoffsets bytecounts = page.databytecounts if i < npages: lastoffset = pages[i + 1].dataoffsets[0] else: # LZW compressed strips might be longer than uncompressed lastoffset = min( offsets[-1] + 2 * bytecounts[-1], self._fh.size ) bytecount_list = list(bytecounts) for j in range(len(bytecounts) - 1): bytecount_list[j] = offsets[j + 1] - offsets[j] bytecount_list[-1] = lastoffset - offsets[-1] page.databytecounts = tuple(bytecount_list) def _ndpi_load_pages(self) -> None: """Read and fix pages from NDPI slide file if CaptureMode > 6. If the value of the CaptureMode tag is greater than 6, change the attributes of TiffPage instances that are part of the pyramid to match 16-bit grayscale data. TiffTag values are not corrected. """ pages = self.pages capturemode = self.pages.first.tags.valueof(65441) if capturemode is None or capturemode < 6: return pages.cache = True pages.useframes = False pages._load() for page in pages: assert isinstance(page, TiffPage) mag = page.tags.valueof(65421) if mag is None or mag > 0: page.photometric = PHOTOMETRIC.MINISBLACK page.sampleformat = SAMPLEFORMAT.UINT page.samplesperpixel = 1 page.bitspersample = 16 page.dtype = page._dtype = numpy.dtype('uint16') if page.shaped[-1] > 1: page.axes = page.axes[:-1] page.shape = page.shape[:-1] page.shaped = page.shaped[:-1] + (1,) def __getattr__(self, name: str, /) -> bool: """Return `is_flag` attributes from first page.""" if name[3:] in TIFF.PAGE_FLAGS: if not self.pages: return False value = bool(getattr(self.pages.first, name)) setattr(self, name, value) return value raise AttributeError( f'{self.__class__.__name__!r} object has no attribute {name!r}' ) def __enter__(self) -> TiffFile: return self def __exit__(self, exc_type, exc_value, traceback) -> None: # type: ignore self.close() def __repr__(self) -> str: return f'' def __str__(self) -> str: return self._str() def _str(self, detail: int = 0, width: int = 79) -> str: """Return string containing information about TiffFile. The `detail` parameter specifies the level of detail returned: 0: file only. 1: all series, first page of series and its tags. 2: large tag values and file metadata. 3: all pages. """ info_list = [ "TiffFile '{}'", format_size(self._fh.size), ( '' if byteorder_isnative(self.byteorder) else {'<': 'little-endian', '>': 'big-endian'}[self.byteorder] ), ] if self.is_bigtiff: info_list.append('BigTiff') if len(self.pages) > 1: info_list.append(f'{len(self.pages)} Pages') if len(self.series) > 1: info_list.append(f'{len(self.series)} Series') if len(self._files) > 1: info_list.append(f'{len(self._files)} Files') flags = self.flags if 'uniform' in flags and len(self.pages) == 1: flags.discard('uniform') info_list.append('|'.join(f.lower() for f in sorted(flags))) info = ' '.join(info_list) info = info.replace(' ', ' ').replace(' ', ' ') info = info.format( snipstr(self._fh.name, max(12, width + 2 - len(info))) ) if detail <= 0: return info info_list = [info] info_list.append('\n'.join(str(s) for s in self.series)) if detail >= 3: for page in self.pages: if page is None: continue info_list.append(page._str(detail=detail, width=width)) if page.pages is not None: for subifd in page.pages: info_list.append( subifd._str(detail=detail, width=width) ) elif self.series: info_list.extend( s.keyframe._str(detail=detail, width=width) for s in self.series if not s.keyframe.parent.filehandle.closed # avoid warning ) elif self.pages and self.pages.first: info_list.append(self.pages.first._str(detail=detail, width=width)) if detail >= 2: for name in sorted(self.flags): if hasattr(self, name + '_metadata'): m = getattr(self, name + '_metadata') if m: info_list.append( '{}_METADATA\n{}'.format( name.upper(), pformat(m, width=width, height=detail * 24), ) ) return '\n\n'.join(info_list).replace('\n\n\n', '\n\n') @cached_property def flags(self) -> set[str]: """Set of file flags (a potentially expensive operation).""" return { name.lower() for name in TIFF.FILE_FLAGS if getattr(self, 'is_' + name) } @cached_property def is_uniform(self) -> bool: """File contains uniform series of pages.""" # the hashes of IFDs 0, 7, and -1 are the same pages = self.pages try: page = self.pages.first except IndexError: return False if page.subifds: return False if page.is_scanimage or page.is_nih: return True i = 0 useframes = pages.useframes try: pages.useframes = False h = page.hash for i in (1, 7, -1): if pages[i].aspage().hash != h: return False except IndexError: return i == 1 # single page TIFF is uniform finally: pages.useframes = useframes return True @property def is_appendable(self) -> bool: """Pages can be appended to file without corrupting.""" # TODO: check other formats return not ( self.is_ome or self.is_lsm or self.is_stk or self.is_imagej or self.is_fluoview or self.is_micromanager ) @property def is_bigtiff(self) -> bool: """File has BigTIFF format.""" return self.tiff.is_bigtiff @cached_property def is_ndtiff(self) -> bool: """File has NDTiff format.""" # file should be accompanied by NDTiff.index meta = self.micromanager_metadata if meta is not None and meta.get('MajorVersion', 0) >= 2: self.is_uniform = True return True return False @cached_property def is_mmstack(self) -> bool: """File has Micro-Manager stack format.""" meta = self.micromanager_metadata if ( meta is not None and 'Summary' in meta and 'IndexMap' in meta and meta.get('MajorVersion', 1) == 0 # and 'MagellanStack' not in self.filename: ): self.is_uniform = True return True return False @cached_property def is_mdgel(self) -> bool: """File has MD Gel format.""" # side effect: add second page, if exists, to cache try: ismdgel = ( self.pages.first.is_mdgel or self.pages.get(1, cache=True).is_mdgel ) if ismdgel: self.is_uniform = False return ismdgel except IndexError: return False @property def is_sis(self) -> bool: """File is Olympus SIS format.""" try: return ( self.pages.first.is_sis and not self.filename.lower().endswith('.vsi') ) except IndexError: return False @cached_property def shaped_metadata(self) -> tuple[dict[str, Any], ...] | None: """Tifffile metadata from JSON formatted ImageDescription tags.""" if self.is_shaped is None: return None result = [] for s in self.series: if s.kind.lower() != 'shaped': continue page = s.pages[0] if ( not isinstance(page, TiffPage) or page.shaped_description is None ): continue result.append(shaped_description_metadata(page.shaped_description)) return tuple(result) @property def ome_metadata(self) -> str | None: """OME XML metadata from ImageDescription tag.""" if not self.is_ome: return None # return xml2dict(self.pages.first.description)['OME'] if self._omexml: return self._omexml return self.pages.first.description @property def scn_metadata(self) -> str | None: """Leica SCN XML metadata from ImageDescription tag.""" if not self.is_scn: return None return self.pages.first.description @property def philips_metadata(self) -> str | None: """Philips DP XML metadata from ImageDescription tag.""" if not self.is_philips: return None return self.pages.first.description @property def indica_metadata(self) -> str | None: """IndicaLabs XML metadata from ImageDescription tag.""" if not self.is_indica: return None return self.pages.first.description @property def lsm_metadata(self) -> dict[str, Any] | None: """LSM metadata from CZ_LSMINFO tag.""" if not self.is_lsm: return None return self.pages.first.tags.valueof(34412) # CZ_LSMINFO @cached_property def stk_metadata(self) -> dict[str, Any] | None: """STK metadata from UIC tags.""" if not self.is_stk: return None page = self.pages.first tags = page.tags result: dict[str, Any] = {} if page.description: result['PlaneDescriptions'] = page.description.split('\x00') # result['plane_descriptions'] = stk_description_metadata( # page.image_description) tag = tags.get(33629) # UIC2tag result['NumberPlanes'] = 1 if tag is None else tag.count value = tags.valueof(33628) # UIC1tag if value is not None: result.update(value) value = tags.valueof(33630) # UIC3tag if value is not None: result.update(value) # wavelengths value = tags.valueof(33631) # UIC4tag if value is not None: result.update(value) # override UIC1 tags uic2tag = tags.valueof(33629) if uic2tag is not None: result['ZDistance'] = uic2tag['ZDistance'] result['TimeCreated'] = uic2tag['TimeCreated'] result['TimeModified'] = uic2tag['TimeModified'] for key in ('Created', 'Modified'): try: result['Datetime' + key] = numpy.array( [ julian_datetime(*dt) for dt in zip( uic2tag['Date' + key], uic2tag['Time' + key] ) ], dtype='datetime64[ns]', ) except Exception as exc: result['Datetime' + key] = None logger().warning( f'{self!r} STK Datetime{key} raised {exc!r:.128}' ) return result @cached_property def imagej_metadata(self) -> dict[str, Any] | None: """ImageJ metadata from ImageDescription and IJMetadata tags.""" if not self.is_imagej: return None page = self.pages.first if page.imagej_description is None: return None result = imagej_description_metadata(page.imagej_description) value = page.tags.valueof(50839) # IJMetadata if value is not None: try: result.update(value) except Exception: pass return result @cached_property def fluoview_metadata(self) -> dict[str, Any] | None: """FluoView metadata from MM_Header and MM_Stamp tags.""" if not self.is_fluoview: return None result = {} page = self.pages.first value = page.tags.valueof(34361) # MM_Header if value is not None: result.update(value) # TODO: read stamps from all pages value = page.tags.valueof(34362) # MM_Stamp if value is not None: result['Stamp'] = value # skip parsing image description; not reliable # try: # t = fluoview_description_metadata(page.image_description) # if t is not None: # result['ImageDescription'] = t # except Exception as exc: # logger().warning( # f'{self!r} ' # f'raised {exc!r:.128}' # ) return result @property def nih_metadata(self) -> dict[str, Any] | None: """NIHImage metadata from NIHImageHeader tag.""" if not self.is_nih: return None return self.pages.first.tags.valueof(43314) # NIHImageHeader @property def fei_metadata(self) -> dict[str, Any] | None: """FEI metadata from SFEG or HELIOS tags.""" if not self.is_fei: return None tags = self.pages.first.tags result = {} try: result.update(tags.valueof(34680)) # FEI_SFEG except Exception: pass try: result.update(tags.valueof(34682)) # FEI_HELIOS except Exception: pass return result @property def sem_metadata(self) -> dict[str, Any] | None: """SEM metadata from CZ_SEM tag.""" if not self.is_sem: return None return self.pages.first.tags.valueof(34118) @property def sis_metadata(self) -> dict[str, Any] | None: """Olympus SIS metadata from OlympusSIS and OlympusINI tags.""" if not self.pages.first.is_sis: return None tags = self.pages.first.tags result = {} try: result.update(tags.valueof(33471)) # OlympusINI except Exception: pass try: result.update(tags.valueof(33560)) # OlympusSIS except Exception: pass return result @cached_property def mdgel_metadata(self) -> dict[str, Any] | None: """MD-GEL metadata from MDFileTag tags.""" if not self.is_mdgel: return None if 33445 in self.pages.first.tags: tags = self.pages.first.tags else: page = cast(TiffPage, self.pages[1]) if 33445 in page.tags: tags = page.tags else: return None result = {} for code in range(33445, 33453): if code not in tags: continue name = TIFF.TAGS[code] result[name[2:]] = tags.valueof(code) return result @property def andor_metadata(self) -> dict[str, Any] | None: """Andor metadata from Andor tags.""" return self.pages.first.andor_tags @property def epics_metadata(self) -> dict[str, Any] | None: """EPICS metadata from areaDetector tags.""" return self.pages.first.epics_tags @property def tvips_metadata(self) -> dict[str, Any] | None: """TVIPS metadata from tag.""" if not self.is_tvips: return None return self.pages.first.tags.valueof(37706) @cached_property def metaseries_metadata(self) -> dict[str, Any] | None: """MetaSeries metadata from ImageDescription tag of first tag.""" # TODO: remove this? It is a per page property if not self.is_metaseries: return None return metaseries_description_metadata(self.pages.first.description) @cached_property def pilatus_metadata(self) -> dict[str, Any] | None: """Pilatus metadata from ImageDescription tag.""" if not self.is_pilatus: return None return pilatus_description_metadata(self.pages.first.description) @cached_property def micromanager_metadata(self) -> dict[str, Any] | None: """Non-TIFF Micro-Manager metadata.""" if not self.is_micromanager: return None return read_micromanager_metadata(self._fh) @cached_property def gdal_structural_metadata(self) -> dict[str, Any] | None: """Non-TIFF GDAL structural metadata.""" return read_gdal_structural_metadata(self._fh) @cached_property def scanimage_metadata(self) -> dict[str, Any] | None: """ScanImage non-varying frame and ROI metadata. The returned dict may contain 'FrameData', 'RoiGroups', and 'version' keys. Varying frame data can be found in the ImageDescription tags. """ if not self.is_scanimage: return None result: dict[str, Any] = {} try: framedata, roidata, version = read_scanimage_metadata(self._fh) result['version'] = version result['FrameData'] = framedata result.update(roidata) except ValueError: pass return result @property def geotiff_metadata(self) -> dict[str, Any] | None: """GeoTIFF metadata from tags.""" if not self.is_geotiff: return None return self.pages.first.geotiff_tags @property def gdal_metadata(self) -> dict[str, Any] | None: """GDAL XML metadata from GDAL_METADATA tag.""" if not self.is_gdal: return None return self.pages.first.tags.valueof(42112) @cached_property def astrotiff_metadata(self) -> dict[str, Any] | None: """AstroTIFF metadata from ImageDescription tag.""" if not self.is_astrotiff: return None return astrotiff_description_metadata(self.pages.first.description) @cached_property def streak_metadata(self) -> dict[str, Any] | None: """Hamamatsu streak metadata from ImageDescription tag.""" if not self.is_streak: return None return streak_description_metadata( self.pages.first.description, self.filehandle ) @property def eer_metadata(self) -> str | None: """EER AcquisitionMetadata XML from tag 65001.""" if not self.is_eer: return None value = self.pages.first.tags.valueof(65001) return None if value is None else value.decode() @final class TiffFormat: """TIFF format properties.""" __slots__ = ( 'version', 'byteorder', 'offsetsize', 'offsetformat', 'tagnosize', 'tagnoformat', 'tagsize', 'tagformat1', 'tagformat2', 'tagoffsetthreshold', '_hash', ) version: int """Version of TIFF header.""" byteorder: Literal['>', '<'] """Byteorder of TIFF header.""" offsetsize: int """Size of offsets.""" offsetformat: str """Struct format for offset values.""" tagnosize: int """Size of `tagnoformat`.""" tagnoformat: str """Struct format for number of TIFF tags.""" tagsize: int """Size of `tagformat1` and `tagformat2`.""" tagformat1: str """Struct format for code and dtype of TIFF tag.""" tagformat2: str """Struct format for count and value of TIFF tag.""" tagoffsetthreshold: int """Size of inline tag values.""" _hash: int def __init__( self, version: int, byteorder: Literal['>', '<'], offsetsize: int, offsetformat: str, tagnosize: int, tagnoformat: str, tagsize: int, tagformat1: str, tagformat2: str, tagoffsetthreshold: int, ) -> None: self.version = version self.byteorder = byteorder self.offsetsize = offsetsize self.offsetformat = offsetformat self.tagnosize = tagnosize self.tagnoformat = tagnoformat self.tagsize = tagsize self.tagformat1 = tagformat1 self.tagformat2 = tagformat2 self.tagoffsetthreshold = tagoffsetthreshold self._hash = hash((version, byteorder, offsetsize)) @property def is_bigtiff(self) -> bool: """Format is 64-bit BigTIFF.""" return self.version == 43 @property def is_ndpi(self) -> bool: """Format is 32-bit TIFF with 64-bit offsets used by NDPI.""" return self.version == 42 and self.offsetsize == 8 def __hash__(self) -> int: return self._hash def __repr__(self) -> str: bits = '32' if self.version == 42 else '64' endian = 'little' if self.byteorder == '<' else 'big' ndpi = ' with 64-bit offsets' if self.is_ndpi else '' return f'' def __str__(self) -> str: return indent( repr(self), *( f'{attr}: {getattr(self, attr)!r}' for attr in TiffFormat.__slots__ ), ) @final class TiffPages(collections.abc.Sequence): """Sequence of TIFF image file directories (IFD chain). TiffPages instances have a state, such as a cache and keyframe, and are not thread-safe. All attributes are read-only. Parameters: arg: If a *TiffFile*, the file position must be at offset to offset to TiffPage. If a *TiffPage* or *TiffFrame*, page offsets are read from the SubIFDs tag. Only the first page is initially read from the file. index: Position of IFD chain in IFD tree. """ parent: TiffFile | None = None """TiffFile instance pages belongs to.""" _pages: list[TiffPage | TiffFrame | int] # list of pages _keyframe: TiffPage | None _tiffpage: type[TiffPage] | type[TiffFrame] # class used for reading pages _indexed: bool _cached: bool _cache: bool _offset: int _nextpageoffset: int | None _index: tuple[int, ...] | None def __init__( self, arg: TiffFile | TiffPage | TiffFrame, /, *, index: Sequence[int] | int | None = None, ) -> None: offset: int self.parent = None self._pages = [] # cache of TiffPages, TiffFrames, or their offsets self._indexed = False # True if offsets to all pages were read self._cached = False # True if all pages were read into cache self._tiffpage = TiffPage # class used for reading pages self._keyframe = None # page that is currently used as keyframe self._cache = False # do not cache frames or pages (if not keyframe) self._offset = 0 self._nextpageoffset = None if index is None: self._index = None elif isinstance(index, (int, numpy.integer)): self._index = (int(index),) else: self._index = tuple(index) if isinstance(arg, TiffFile): # read offset to first page from current file position self.parent = arg fh = self.parent.filehandle self._nextpageoffset = fh.tell() offset = struct.unpack( self.parent.tiff.offsetformat, fh.read(self.parent.tiff.offsetsize), )[0] if offset == 0: logger().warning(f'{arg!r} contains no pages') self._indexed = True return elif arg.subifds is not None: # use offsets from SubIFDs tag offsets = arg.subifds self.parent = arg.parent fh = self.parent.filehandle if len(offsets) == 0 or offsets[0] == 0: logger().warning(f'{arg!r} contains invalid SubIFDs') self._indexed = True return offset = offsets[0] else: self._indexed = True return self._offset = offset if offset >= fh.size: logger().warning( f'{self!r} invalid offset to first page {offset!r}' ) self._indexed = True return pageindex: int | tuple[int, ...] = ( 0 if self._index is None else self._index + (0,) ) # read and cache first page fh.seek(offset) page = TiffPage(self.parent, index=pageindex) self._pages.append(page) self._keyframe = page if self._nextpageoffset is None: # offsets from SubIFDs tag self._pages.extend(offsets[1:]) self._indexed = True self._cached = True @property def pages(self) -> list[TiffPage | TiffFrame | int]: """Deprecated. Use the TiffPages sequence interface. :meta private: """ warnings.warn( ' is deprecated since 2024.5.22. ' 'Use the TiffPages sequence interface.', DeprecationWarning, stacklevel=2, ) return self._pages @property def first(self) -> TiffPage: """First page as TiffPage if exists, else raise IndexError.""" return cast(TiffPage, self._pages[0]) @property def is_multipage(self) -> bool: """IFD chain contains more than one page.""" try: self._seek(1) return True except IndexError: return False @property def cache(self) -> bool: """Pages and frames are being cached. When set to *False*, the cache is cleared. """ return self._cache @cache.setter def cache(self, value: bool, /) -> None: value = bool(value) if self._cache and not value: self._clear() self._cache = value @property def useframes(self) -> bool: """Use TiffFrame (True) or TiffPage (False).""" return self._tiffpage == TiffFrame @useframes.setter def useframes(self, value: bool, /) -> None: self._tiffpage = TiffFrame if value else TiffPage @property def keyframe(self) -> TiffPage | None: """TiffPage used as keyframe for new TiffFrames.""" return self._keyframe def set_keyframe(self, index: int, /) -> None: """Set keyframe to TiffPage specified by `index`. If not found in the cache, the TiffPage at `index` is loaded from file and added to the cache. """ if not isinstance(index, (int, numpy.integer)): raise TypeError(f'indices must be integers, not {type(index)}') index = int(index) if index < 0: index %= len(self) if self._keyframe is not None and self._keyframe.index == index: return if index == 0: self._keyframe = cast(TiffPage, self._pages[0]) return if self._indexed or index < len(self._pages): page = self._pages[index] if isinstance(page, TiffPage): self._keyframe = page return if isinstance(page, TiffFrame): # remove existing TiffFrame self._pages[index] = page.offset # load TiffPage from file tiffpage = self._tiffpage self._tiffpage = TiffPage try: self._keyframe = cast(TiffPage, self._getitem(index)) finally: self._tiffpage = tiffpage # always cache keyframes self._pages[index] = self._keyframe @property def next_page_offset(self) -> int | None: """Offset where offset to new page can be stored.""" if not self._indexed: self._seek(-1) return self._nextpageoffset def get( self, key: int, /, default: TiffPage | TiffFrame | None = None, *, validate: int = 0, cache: bool = False, aspage: bool = True, ) -> TiffPage | TiffFrame: """Return specified page from cache or file. The specified TiffPage or TiffFrame is read from file if it is not found in the cache. Parameters: key: Index of requested page in IFD chain. default: Page or frame to return if key is out of bounds. By default, an IndexError is raised if key is out of bounds. validate: If non-zero, raise RuntimeError if value does not match hash of TiffPage or TiffFrame. cache: Store returned page in cache for future use. aspage: Return TiffPage instance. """ try: return self._getitem( key, validate=validate, cache=cache, aspage=aspage ) except IndexError: if default is None: raise return default def _load(self, keyframe: TiffPage | bool | None = True, /) -> None: """Read all remaining pages from file.""" assert self.parent is not None if self._cached: return pages = self._pages if not pages: return if not self._indexed: self._seek(-1) if not self._cache: return fh = self.parent.filehandle if keyframe is not None: keyframe = self._keyframe for i, page in enumerate(pages): if isinstance(page, (int, numpy.integer)): pageindex: int | tuple[int, ...] = ( i if self._index is None else self._index + (i,) ) fh.seek(page) page = self._tiffpage( self.parent, index=pageindex, keyframe=keyframe ) pages[i] = page self._cached = True def _load_virtual_frames(self) -> None: """Calculate virtual TiffFrames.""" assert self.parent is not None pages = self._pages try: if len(pages) > 1: raise ValueError('pages already loaded') page = cast(TiffPage, pages[0]) if not page.is_contiguous: raise ValueError('data not contiguous') self._seek(4) # following pages are int delta = cast(int, pages[2]) - cast(int, pages[1]) if ( cast(int, pages[3]) - cast(int, pages[2]) != delta or cast(int, pages[4]) - cast(int, pages[3]) != delta ): raise ValueError('page offsets not equidistant') page1 = self._getitem(1, validate=page.hash) offsetoffset = page1.dataoffsets[0] - page1.offset if offsetoffset < 0 or offsetoffset > delta: raise ValueError('page offsets not equidistant') pages = [page, page1] filesize = self.parent.filehandle.size - delta for index, offset in enumerate( range(page1.offset + delta, filesize, delta) ): index += 2 d = index * delta dataoffsets = tuple(i + d for i in page.dataoffsets) offset_or_none = offset if offset < 2**31 - 1 else None pages.append( TiffFrame( page.parent, index=( index if self._index is None else self._index + (index,) ), offset=offset_or_none, dataoffsets=dataoffsets, databytecounts=page.databytecounts, keyframe=page, ) ) self._pages = pages self._cache = True self._cached = True self._indexed = True except Exception as exc: if self.parent.filehandle.size >= 2147483648: logger().warning( f'{self!r} <_load_virtual_frames> raised {exc!r:.128}' ) def _clear(self, fully: bool = True, /) -> None: """Delete all but first page from cache. Set keyframe to first page.""" pages = self._pages if not pages: return self._keyframe = cast(TiffPage, pages[0]) if fully: # delete all but first TiffPage/TiffFrame for i, page in enumerate(pages[1:]): if not isinstance(page, int) and page.offset is not None: pages[i + 1] = page.offset else: # delete only TiffFrames for i, page in enumerate(pages): if isinstance(page, TiffFrame) and page.offset is not None: pages[i] = page.offset self._cached = False def _seek(self, index: int, /) -> int: """Seek file to offset of page specified by index and return offset.""" assert self.parent is not None pages = self._pages lenpages = len(pages) if lenpages == 0: raise IndexError('index out of range') fh = self.parent.filehandle if fh.closed: raise ValueError('seek of closed file') if self._indexed or 0 <= index < lenpages: page = pages[index] offset = page if isinstance(page, int) else page.offset return fh.seek(offset) tiff = self.parent.tiff offsetformat = tiff.offsetformat offsetsize = tiff.offsetsize tagnoformat = tiff.tagnoformat tagnosize = tiff.tagnosize tagsize = tiff.tagsize unpack = struct.unpack page = pages[-1] offset = page if isinstance(page, int) else page.offset while lenpages < 2**32: # read offsets to pages from file until index is reached fh.seek(offset) # skip tags try: tagno = int(unpack(tagnoformat, fh.read(tagnosize))[0]) if tagno > 4096: raise TiffFileError(f'suspicious number of tags {tagno}') except Exception as exc: logger().error( f'{self!r} corrupted tag list of page ' f'{lenpages} @{offset} raised {exc!r:.128}', ) del pages[-1] lenpages -= 1 self._indexed = True break self._nextpageoffset = offset + tagnosize + tagno * tagsize fh.seek(self._nextpageoffset) # read offset to next page try: offset = int(unpack(offsetformat, fh.read(offsetsize))[0]) except Exception as exc: logger().error( f'{self!r} invalid offset to page ' f'{lenpages + 1} @{self._nextpageoffset} ' f'raised {exc!r:.128}' ) self._indexed = True break if offset == 0: self._indexed = True break if offset >= fh.size: logger().error(f'{self!r} invalid page offset {offset!r}') self._indexed = True break pages.append(offset) lenpages += 1 if 0 <= index < lenpages: break # detect some circular references if lenpages == 100: for i, p in enumerate(pages[:-1]): if offset == (p if isinstance(p, int) else p.offset): index = i self._pages = pages[: i + 1] self._indexed = True logger().error( f'{self!r} invalid circular reference to IFD ' f'{i} at {offset=}' ) break if index >= lenpages: raise IndexError('index out of range') page = pages[index] return fh.seek(page if isinstance(page, int) else page.offset) def _getlist( self, key: int | slice | Iterable[int] | None = None, /, useframes: bool = True, validate: bool = True, ) -> list[TiffPage | TiffFrame]: """Return specified pages as list of TiffPages or TiffFrames. The first item is a TiffPage, and is used as a keyframe for following TiffFrames. """ getitem = self._getitem _useframes = self.useframes if key is None: key = iter(range(len(self))) elif isinstance(key, (int, numpy.integer)): # return single TiffPage key = int(key) self.useframes = False if key == 0: return [self.first] try: return [getitem(key)] finally: self.useframes = _useframes elif isinstance(key, slice): start, stop, _ = key.indices(2**31 - 1) if not self._indexed and max(stop, start) > len(self._pages): self._seek(-1) key = iter(range(*key.indices(len(self._pages)))) elif isinstance(key, collections.abc.Iterable): key = iter(key) else: raise TypeError( f'key must be an integer, slice, or iterable, not {type(key)}' ) # use first page as keyframe assert self._keyframe is not None keyframe = self._keyframe self.set_keyframe(next(key)) validhash = self._keyframe.hash if validate else 0 if useframes: self.useframes = True try: pages = [getitem(i, validate=validhash) for i in key] pages.insert(0, self._keyframe) finally: # restore state self._keyframe = keyframe if useframes: self.useframes = _useframes return pages def _getitem( self, key: int, /, *, validate: int = 0, # hash cache: bool = False, aspage: bool = False, ) -> TiffPage | TiffFrame: """Return specified page from cache or file.""" assert self.parent is not None key = int(key) pages = self._pages if key < 0: key %= len(self) elif self._indexed and key >= len(pages): raise IndexError(f'index {key} out of range({len(pages)})') tiffpage = TiffPage if aspage else self._tiffpage if key < len(pages): page = pages[key] if self._cache and not aspage: if not isinstance(page, (int, numpy.integer)): if validate and validate != page.hash: raise RuntimeError('page hash mismatch') return page elif isinstance(page, (TiffPage, tiffpage)): # page is not an int if validate and validate != page.hash: # type: ignore raise RuntimeError('page hash mismatch') return page # type: ignore pageindex: int | tuple[int, ...] = ( key if self._index is None else self._index + (key,) ) self._seek(key) page = tiffpage(self.parent, index=pageindex, keyframe=self._keyframe) assert isinstance(page, (TiffPage, TiffFrame)) if validate and validate != page.hash: raise RuntimeError('page hash mismatch') if self._cache or cache: pages[key] = page return page @overload def __getitem__(self, key: int, /) -> TiffPage | TiffFrame: ... @overload def __getitem__( self, key: slice | Iterable[int], / ) -> list[TiffPage | TiffFrame]: ... def __getitem__( self, key: int | slice | Iterable[int], / ) -> TiffPage | TiffFrame | list[TiffPage | TiffFrame]: pages = self._pages getitem = self._getitem if isinstance(key, (int, numpy.integer)): key = int(key) if key == 0: return cast(TiffPage, pages[key]) return getitem(key) if isinstance(key, slice): start, stop, _ = key.indices(2**31 - 1) if not self._indexed and max(stop, start) > len(pages): self._seek(-1) return [getitem(i) for i in range(*key.indices(len(pages)))] if isinstance(key, collections.abc.Iterable): return [getitem(k) for k in key] raise TypeError('key must be an integer, slice, or iterable') def __iter__(self) -> Iterator[TiffPage | TiffFrame]: i = 0 while True: try: yield self._getitem(i) i += 1 except IndexError: break if self._cache: self._cached = True def __bool__(self) -> bool: """Return True if file contains any pages.""" return len(self._pages) > 0 def __len__(self) -> int: """Return number of pages in file.""" if not self._indexed: self._seek(-1) return len(self._pages) def __repr__(self) -> str: return f'' @final class TiffPage: """TIFF image file directory (IFD). TiffPage instances are not thread-safe. All attributes are read-only. Parameters: parent: TiffFile instance to read page from. The file handle position must be at an offset to an IFD structure. index: Index of page in IFD tree. keyframe: Not used. Raises: TiffFileError: Invalid TIFF structure. """ # instance attributes tags: TiffTags """Tags belonging to page.""" parent: TiffFile """TiffFile instance page belongs to.""" offset: int """Position of page in file.""" shape: tuple[int, ...] """Shape of image array in page.""" dtype: numpy.dtype[Any] | None """Data type of image array in page.""" shaped: tuple[int, int, int, int, int] """Normalized 5-dimensional shape of image array in page: 0. separate samplesperpixel or 1. 1. imagedepth or 1. 2. imagelength. 3. imagewidth. 4. contig samplesperpixel or 1. """ axes: str """Character codes for dimensions in image array: 'S' sample, 'X' width, 'Y' length, 'Z' depth. """ dataoffsets: tuple[int, ...] """Positions of strips or tiles in file.""" databytecounts: tuple[int, ...] """Size of strips or tiles in file.""" _dtype: numpy.dtype[Any] | None _index: tuple[int, ...] # index of page in IFD tree # default properties; might be updated from tags subfiletype: int = 0 """:py:class:`FILETYPE` kind of image.""" imagewidth: int = 0 """Number of columns (pixels per row) in image.""" imagelength: int = 0 """Number of rows in image.""" imagedepth: int = 1 """Number of Z slices in image.""" tilewidth: int = 0 """Number of columns in each tile.""" tilelength: int = 0 """Number of rows in each tile.""" tiledepth: int = 1 """Number of Z slices in each tile.""" samplesperpixel: int = 1 """Number of components per pixel.""" bitspersample: int = 1 """Number of bits per pixel component.""" sampleformat: int = 1 """:py:class:`SAMPLEFORMAT` type of pixel components.""" rowsperstrip: int = 2**32 - 1 """Number of rows per strip.""" compression: int = 1 """:py:class:`COMPRESSION` scheme used on image data.""" planarconfig: int = 1 """:py:class:`PLANARCONFIG` type of storage of components in pixel.""" fillorder: int = 1 """Logical order of bits within byte of image data.""" photometric: int = 0 """:py:class:`PHOTOMETRIC` color space of image.""" predictor: int = 1 """:py:class:`PREDICTOR` applied to image data before compression.""" extrasamples: tuple[int, ...] = () """:py:class:`EXTRASAMPLE` interpretation of extra components in pixel.""" subsampling: tuple[int, int] | None = None """Subsampling factors used for chrominance components.""" subifds: tuple[int, ...] | None = None """Positions of SubIFDs in file.""" jpegtables: bytes | None = None """JPEG quantization and Huffman tables.""" jpegheader: bytes | None = None """JPEG header for NDPI.""" software: str = '' """Software used to create image.""" description: str = '' """Subject of image.""" description1: str = '' """Value of second ImageDescription tag.""" nodata: int | float = 0 """Value used for missing data.""" def __init__( self, parent: TiffFile, /, index: int | Sequence[int], *, keyframe: TiffPage | None = None, ) -> None: tag: TiffTag | None tiff = parent.tiff self.parent = parent self.shape = () self.shaped = (0, 0, 0, 0, 0) self.dtype = self._dtype = None self.axes = '' self.tags = tags = TiffTags() self.dataoffsets = () self.databytecounts = () if isinstance(index, int): self._index = (index,) else: self._index = tuple(index) # read IFD structure and its tags from file fh = parent.filehandle self.offset = fh.tell() # offset to this IFD try: tagno: int = struct.unpack( tiff.tagnoformat, fh.read(tiff.tagnosize) )[0] if tagno > 4096: raise ValueError(f'suspicious number of tags {tagno}') except Exception as exc: raise TiffFileError(f'corrupted tag list @{self.offset}') from exc tagoffset = self.offset + tiff.tagnosize # fh.tell() tagsize = tagsize_ = tiff.tagsize data = fh.read(tagsize * tagno) if len(data) != tagsize * tagno: raise TiffFileError('corrupted IFD structure') if tiff.is_ndpi: # patch offsets/values for 64-bit NDPI file tagsize = 16 fh.seek(8, os.SEEK_CUR) ext = fh.read(4 * tagno) # high bits data = b''.join( data[i * 12 : i * 12 + 12] + ext[i * 4 : i * 4 + 4] for i in range(tagno) ) tagindex = -tagsize for i in range(tagno): tagindex += tagsize tagdata = data[tagindex : tagindex + tagsize] try: tag = TiffTag.fromfile( parent, offset=tagoffset + i * tagsize_, header=tagdata ) except TiffFileError as exc: logger().error(f' raised {exc!r:.128}') continue tags.add(tag) if not tags: return # found in FIBICS for code, name in TIFF.TAG_ATTRIBUTES.items(): value = tags.valueof(code) if value is None: continue if code in {270, 305} and not isinstance(value, str): # wrong string type for software or description continue setattr(self, name, value) value = tags.valueof(270, index=1) if isinstance(value, str): self.description1 = value if self.subfiletype == 0: value = tags.valueof(255) # SubfileType if value == 2: self.subfiletype = 0b1 # reduced image elif value == 3: self.subfiletype = 0b10 # multi-page elif not isinstance(self.subfiletype, int): # files created by IDEAS logger().warning(f'{self!r} invalid {self.subfiletype=}') self.subfiletype = 0 # consolidate private tags; remove them from self.tags # if self.is_andor: # self.andor_tags # elif self.is_epics: # self.epics_tags # elif self.is_ndpi: # self.ndpi_tags # if self.is_sis and 34853 in tags: # # TODO: cannot change tag.name # tags[34853].name = 'OlympusSIS2' # dataoffsets and databytecounts # TileOffsets self.dataoffsets = tags.valueof(324) if self.dataoffsets is None: # StripOffsets self.dataoffsets = tags.valueof(273) if self.dataoffsets is None: # JPEGInterchangeFormat et al. self.dataoffsets = tags.valueof(513) if self.dataoffsets is None: self.dataoffsets = () logger().error(f'{self!r} missing data offset tag') # TileByteCounts self.databytecounts = tags.valueof(325) if self.databytecounts is None: # StripByteCounts self.databytecounts = tags.valueof(279) if self.databytecounts is None: # JPEGInterchangeFormatLength et al. self.databytecounts = tags.valueof(514) if ( self.imagewidth == 0 and self.imagelength == 0 and self.dataoffsets and self.databytecounts ): # dimensions may be missing in some RAW formats # read dimensions from assumed JPEG encoded segment try: fh.seek(self.dataoffsets[0]) ( precision, imagelength, imagewidth, samplesperpixel, ) = jpeg_shape(fh.read(min(self.databytecounts[0], 4096))) except Exception: pass else: self.imagelength = imagelength self.imagewidth = imagewidth self.samplesperpixel = samplesperpixel if 258 not in tags: self.bitspersample = 8 if precision <= 8 else 16 if 262 not in tags and samplesperpixel == 3: self.photometric = PHOTOMETRIC.YCBCR if 259 not in tags: self.compression = COMPRESSION.OJPEG if 278 not in tags: self.rowsperstrip = imagelength elif self.compression == 6: # OJPEG hack. See libtiff v4.2.0 tif_dirread.c#L4082 if 262 not in tags: # PhotometricInterpretation missing self.photometric = PHOTOMETRIC.YCBCR elif self.photometric == 2: # RGB -> YCbCr self.photometric = PHOTOMETRIC.YCBCR if 258 not in tags: # BitsPerSample missing self.bitspersample = 8 if 277 not in tags: # SamplesPerPixel missing if self.photometric in {2, 6}: self.samplesperpixel = 3 elif self.photometric in {0, 1}: self.samplesperpixel = 3 elif self.is_lsm or (self.index != 0 and self.parent.is_lsm): # correct non standard LSM bitspersample tags tags[258]._fix_lsm_bitspersample() if self.compression == 1 and self.predictor != 1: # work around bug in LSM510 software self.predictor = PREDICTOR.NONE elif self.is_vista or (self.index != 0 and self.parent.is_vista): # ISS Vista writes wrong ImageDepth tag self.imagedepth = 1 elif self.is_stk: # read UIC1tag again now that plane count is known tag = tags.get(33628) # UIC1tag assert tag is not None fh.seek(tag.valueoffset) uic2tag = tags.get(33629) # UIC2tag try: tag.value = read_uic1tag( fh, tiff.byteorder, tag.dtype, tag.count, 0, planecount=uic2tag.count if uic2tag is not None else 1, ) except Exception as exc: logger().warning( f'{self!r} raised {exc!r:.128}' ) tag = tags.get(50839) if tag is not None: # decode IJMetadata tag try: tag.value = imagej_metadata( tag.value, tags[50838].value, # IJMetadataByteCounts tiff.byteorder, ) except Exception as exc: logger().warning( f'{self!r} raised {exc!r:.128}' ) # BitsPerSample value = tags.valueof(258) if value is not None: if self.bitspersample != 1: pass # bitspersample was set by ojpeg hack elif tags[258].count == 1: self.bitspersample = int(value) else: # LSM might list more items than samplesperpixel value = value[: self.samplesperpixel] if any(v - value[0] for v in value): self.bitspersample = value else: self.bitspersample = int(value[0]) # SampleFormat value = tags.valueof(339) if value is not None: if tags[339].count == 1: try: self.sampleformat = SAMPLEFORMAT(value) except ValueError: self.sampleformat = int(value) else: value = value[: self.samplesperpixel] if any(v - value[0] for v in value): try: self.sampleformat = SAMPLEFORMAT(value) except ValueError: self.sampleformat = int(value) else: try: self.sampleformat = SAMPLEFORMAT(value[0]) except ValueError: self.sampleformat = int(value[0]) elif self.bitspersample == 32 and ( self.is_indica or (self.index != 0 and self.parent.is_indica) ): # IndicaLabsImageWriter does not write SampleFormat tag self.sampleformat = SAMPLEFORMAT.IEEEFP if 322 in tags: # TileWidth self.rowsperstrip = 0 elif 257 in tags: # ImageLength if 278 not in tags or tags[278].count > 1: # RowsPerStrip self.rowsperstrip = self.imagelength self.rowsperstrip = min(self.rowsperstrip, self.imagelength) # self.stripsperimage = int(math.floor( # float(self.imagelength + self.rowsperstrip - 1) / # self.rowsperstrip)) # determine dtype dtypestr = TIFF.SAMPLE_DTYPES.get( (self.sampleformat, self.bitspersample), None ) if dtypestr is not None: dtype = numpy.dtype(dtypestr) else: dtype = None self.dtype = self._dtype = dtype # determine shape of data imagelength = self.imagelength imagewidth = self.imagewidth imagedepth = self.imagedepth samplesperpixel = self.samplesperpixel if self.photometric == 2 or samplesperpixel > 1: # PHOTOMETRIC.RGB if self.planarconfig == 1: self.shaped = ( 1, imagedepth, imagelength, imagewidth, samplesperpixel, ) if imagedepth == 1: self.shape = (imagelength, imagewidth, samplesperpixel) self.axes = 'YXS' else: self.shape = ( imagedepth, imagelength, imagewidth, samplesperpixel, ) self.axes = 'ZYXS' else: self.shaped = ( samplesperpixel, imagedepth, imagelength, imagewidth, 1, ) if imagedepth == 1: self.shape = (samplesperpixel, imagelength, imagewidth) self.axes = 'SYX' else: self.shape = ( samplesperpixel, imagedepth, imagelength, imagewidth, ) self.axes = 'SZYX' else: self.shaped = (1, imagedepth, imagelength, imagewidth, 1) if imagedepth == 1: self.shape = (imagelength, imagewidth) self.axes = 'YX' else: self.shape = (imagedepth, imagelength, imagewidth) self.axes = 'ZYX' if not self.databytecounts: self.databytecounts = ( product(self.shape) * (self.bitspersample // 8), ) if self.compression != 1: logger().error(f'{self!r} missing ByteCounts tag') if imagelength and self.rowsperstrip and not self.is_lsm: # fix incorrect number of strip bytecounts and offsets maxstrips = ( int( math.floor(imagelength + self.rowsperstrip - 1) / self.rowsperstrip ) * self.imagedepth ) if self.planarconfig == 2: maxstrips *= self.samplesperpixel if maxstrips != len(self.databytecounts): logger().error( f'{self!r} incorrect StripByteCounts count ' f'({len(self.databytecounts)} != {maxstrips})' ) self.databytecounts = self.databytecounts[:maxstrips] if maxstrips != len(self.dataoffsets): logger().error( f'{self!r} incorrect StripOffsets count ' f'({len(self.dataoffsets)} != {maxstrips})' ) self.dataoffsets = self.dataoffsets[:maxstrips] value = tags.valueof(42113) # GDAL_NODATA if value is not None and dtype is not None: try: pytype = type(dtype.type(0).item()) value = value.replace(',', '.') # comma decimal separator self.nodata = pytype(value) except Exception: pass mcustarts = tags.valueof(65426) if mcustarts is not None and self.is_ndpi: # use NDPI JPEG McuStarts as tile offsets mcustarts = mcustarts.astype('int64') high = tags.valueof(65432) if high is not None: # McuStartsHighBytes high = high.astype('uint64') high <<= 32 mcustarts += high.astype('int64') fh.seek(self.dataoffsets[0]) jpegheader = fh.read(mcustarts[0]) try: ( self.tilelength, self.tilewidth, self.jpegheader, ) = ndpi_jpeg_tile(jpegheader) except ValueError as exc: logger().warning( f'{self!r} raised {exc!r:.128}' ) else: # TODO: optimize tuple(ndarray.tolist()) databytecounts = numpy.diff( mcustarts, append=self.databytecounts[0] ) self.databytecounts = tuple(databytecounts.tolist()) mcustarts += self.dataoffsets[0] self.dataoffsets = tuple(mcustarts.tolist()) @cached_property def decode( self, ) -> Callable[ ..., tuple[ NDArray[Any] | None, tuple[int, int, int, int, int], tuple[int, int, int, int], ], ]: """Return decoded segment, its shape, and indices in image. The decode function is implemented as a closure and has the following signature: Parameters: data (Union[bytes, None]): Encoded bytes of segment (strip or tile) or None for empty segments. index (int): Index of segment in Offsets and Bytecount tag values. jpegtables (Optional[bytes]): For JPEG compressed segments only, value of JPEGTables tag if any. Returns: - Decoded segment or None for empty segments. - Position of segment in image array of normalized shape (separate sample, depth, length, width, contig sample). - Shape of segment (depth, length, width, contig samples). The shape of strips depends on their linear index. Raises: ValueError or NotImplementedError: Decoding is not supported. TiffFileError: Invalid TIFF structure. """ if self.hash in self.parent._parent._decoders: return self.parent._parent._decoders[self.hash] def cache(decode, /): self.parent._parent._decoders[self.hash] = decode return decode if self.dtype is None or self._dtype is None: def decode_raise_dtype(*args, **kwargs): raise ValueError( 'data type not supported ' f'(SampleFormat {self.sampleformat}, ' f'{self.bitspersample}-bit)' ) return cache(decode_raise_dtype) if 0 in self.shaped: def decode_raise_empty(*args, **kwargs): raise ValueError('empty image') return cache(decode_raise_empty) try: if self.compression == 1: decompress = None else: decompress = TIFF.DECOMPRESSORS[self.compression] if ( self.compression in {65000, 65001, 65002} and not self.parent.is_eer ): raise KeyError(self.compression) except KeyError as exc: def decode_raise_compression(*args, exc=str(exc)[1:-1], **kwargs): raise ValueError(f'{exc}') return cache(decode_raise_compression) try: if self.predictor == 1: unpredict = None else: unpredict = TIFF.UNPREDICTORS[self.predictor] except KeyError as exc: if self.compression in TIFF.IMAGE_COMPRESSIONS: logger().warning( f'{self!r} ignoring predictor {self.predictor}' ) unpredict = None else: def decode_raise_predictor( *args, exc=str(exc)[1:-1], **kwargs ): raise ValueError(f'{exc}') return cache(decode_raise_predictor) if self.tags.get(339) is not None: tag = self.tags[339] # SampleFormat if tag.count != 1 and any(i - tag.value[0] for i in tag.value): def decode_raise_sampleformat(*args, **kwargs): raise ValueError( f'sample formats do not match {tag.value}' ) return cache(decode_raise_sampleformat) if self.is_subsampled and ( self.compression not in {6, 7, 34892, 33007} or self.planarconfig == 2 ): def decode_raise_subsampling(*args, **kwargs): raise NotImplementedError( 'chroma subsampling not supported without JPEG compression' ) return cache(decode_raise_subsampling) if self.compression == 50001 and self.samplesperpixel == 4: # WebP segments may be missing all-opaque alpha channel def decompress_webp_rgba(data, out=None): return imagecodecs.webp_decode(data, hasalpha=True, out=out) decompress = decompress_webp_rgba # normalize segments shape to [depth, length, width, contig] if self.is_tiled: stshape = ( self.tiledepth, self.tilelength, self.tilewidth, self.samplesperpixel if self.planarconfig == 1 else 1, ) else: stshape = ( 1, self.rowsperstrip, self.imagewidth, self.samplesperpixel if self.planarconfig == 1 else 1, ) stdepth, stlength, stwidth, samples = stshape _, imdepth, imlength, imwidth, samples = self.shaped if self.is_tiled: width = (imwidth + stwidth - 1) // stwidth length = (imlength + stlength - 1) // stlength depth = (imdepth + stdepth - 1) // stdepth def indices( segmentindex: int, / ) -> tuple[ tuple[int, int, int, int, int], tuple[int, int, int, int] ]: # return indices and shape of tile in image array return ( ( segmentindex // (width * length * depth), (segmentindex // (width * length)) % depth * stdepth, (segmentindex // width) % length * stlength, segmentindex % width * stwidth, 0, ), stshape, ) def reshape( data: NDArray[Any], indices: tuple[int, int, int, int, int], shape: tuple[int, int, int, int], /, ) -> NDArray[Any]: # return reshaped tile or raise TiffFileError size = shape[0] * shape[1] * shape[2] * shape[3] if data.ndim == 1 and data.size > size: # decompression / unpacking might return too many bytes data = data[:size] if data.size == size: # complete tile # data might be non-contiguous; cannot reshape inplace return data.reshape(shape) try: # data fills remaining space # found in JPEG/PNG compressed tiles return data.reshape( ( min(imdepth - indices[1], shape[0]), min(imlength - indices[2], shape[1]), min(imwidth - indices[3], shape[2]), samples, ) ) except ValueError: pass try: # data fills remaining horizontal space # found in tiled GeoTIFF return data.reshape( ( min(imdepth - indices[1], shape[0]), min(imlength - indices[2], shape[1]), shape[2], samples, ) ) except ValueError: pass raise TiffFileError( f'corrupted tile @ {indices} cannot be reshaped from ' f'{data.shape} to {shape}' ) def pad( data: NDArray[Any], shape: tuple[int, int, int, int], / ) -> tuple[NDArray[Any], tuple[int, int, int, int]]: # pad tile to shape if data.shape == shape: return data, shape padwidth = [(0, i - j) for i, j in zip(shape, data.shape)] data = numpy.pad(data, padwidth, constant_values=self.nodata) return data, shape def pad_none( shape: tuple[int, int, int, int], / ) -> tuple[int, int, int, int]: # return shape of tile return shape else: # strips length = (imlength + stlength - 1) // stlength def indices( segmentindex: int, / ) -> tuple[ tuple[int, int, int, int, int], tuple[int, int, int, int] ]: # return indices and shape of strip in image array indices = ( segmentindex // (length * imdepth), (segmentindex // length) % imdepth * stdepth, segmentindex % length * stlength, 0, 0, ) shape = ( stdepth, min(stlength, imlength - indices[2]), stwidth, samples, ) return indices, shape def reshape( data: NDArray[Any], indices: tuple[int, int, int, int, int], shape: tuple[int, int, int, int], /, ) -> NDArray[Any]: # return reshaped strip or raise TiffFileError size = shape[0] * shape[1] * shape[2] * shape[3] if data.ndim == 1 and data.size > size: # decompression / unpacking might return too many bytes data = data[:size] if data.size == size: # expected size try: data.shape = shape except AttributeError: # incompatible shape for in-place modification # decoder returned non-contiguous array data = data.reshape(shape) return data datashape = data.shape try: # too many rows? data.shape = shape[0], -1, shape[2], shape[3] data = data[:, : shape[1]] data.shape = shape return data except ValueError: pass raise TiffFileError( 'corrupted strip cannot be reshaped from ' f'{datashape} to {shape}' ) def pad( data: NDArray[Any], shape: tuple[int, int, int, int], / ) -> tuple[NDArray[Any], tuple[int, int, int, int]]: # pad strip length to rowsperstrip shape = (shape[0], stlength, shape[2], shape[3]) if data.shape == shape: return data, shape padwidth = [ (0, 0), (0, stlength - data.shape[1]), (0, 0), (0, 0), ] data = numpy.pad(data, padwidth, constant_values=self.nodata) return data, shape def pad_none( shape: tuple[int, int, int, int], / ) -> tuple[int, int, int, int]: # return shape of strip return (shape[0], stlength, shape[2], shape[3]) if self.compression in {6, 7, 34892, 33007}: # JPEG needs special handling if self.fillorder == 2: logger().debug(f'{self!r} disabling LSB2MSB for JPEG') if unpredict: logger().debug(f'{self!r} disabling predictor for JPEG') if 28672 in self.tags: # SonyRawFileType logger().warning( f'{self!r} SonyRawFileType might need additional ' 'unpacking (see issue #95)' ) colorspace, outcolorspace = jpeg_decode_colorspace( self.photometric, self.planarconfig, self.extrasamples, self.is_jfif, ) def decode_jpeg( data: bytes | None, index: int, /, *, jpegtables: bytes | None = None, jpegheader: bytes | None = None, _fullsize: bool = False, ) -> tuple[ NDArray[Any] | None, tuple[int, int, int, int, int], tuple[int, int, int, int], ]: # return decoded segment, its shape, and indices in image segmentindex, shape = indices(index) if data is None: if _fullsize: shape = pad_none(shape) return data, segmentindex, shape data_array: NDArray[Any] = imagecodecs.jpeg_decode( data, bitspersample=self.bitspersample, tables=jpegtables, header=jpegheader, colorspace=colorspace, outcolorspace=outcolorspace, shape=shape[1:3], ) data_array = reshape(data_array, segmentindex, shape) if _fullsize: data_array, shape = pad(data_array, shape) return data_array, segmentindex, shape return cache(decode_jpeg) if self.compression in {65000, 65001, 65002}: # EER decoder requires shape and extra args if self.compression == 65002: rlebits = int(self.tags.valueof(65007, 7)) horzbits = int(self.tags.valueof(65008, 2)) vertbits = int(self.tags.valueof(65009, 2)) elif self.compression == 65001: rlebits = 7 horzbits = 2 vertbits = 2 else: rlebits = 8 horzbits = 2 vertbits = 2 def decode_eer( data: bytes | None, index: int, /, *, jpegtables: bytes | None = None, jpegheader: bytes | None = None, _fullsize: bool = False, ) -> tuple[ NDArray[Any] | None, tuple[int, int, int, int, int], tuple[int, int, int, int], ]: # return decoded eer segment, its shape, and indices in image segmentindex, shape = indices(index) if data is None: if _fullsize: shape = pad_none(shape) return data, segmentindex, shape data_array = decompress( data, shape=shape[1:3], rlebits=rlebits, horzbits=horzbits, vertbits=vertbits, superres=False, ) # type: ignore return data_array.reshape(shape), segmentindex, shape return cache(decode_eer) if self.compression == 48124: # Jetraw requires pre-allocated output buffer def decode_jetraw( data: bytes | None, index: int, /, *, jpegtables: bytes | None = None, jpegheader: bytes | None = None, _fullsize: bool = False, ) -> tuple[ NDArray[Any] | None, tuple[int, int, int, int, int], tuple[int, int, int, int], ]: # return decoded segment, its shape, and indices in image segmentindex, shape = indices(index) if data is None: if _fullsize: shape = pad_none(shape) return data, segmentindex, shape data_array = numpy.zeros(shape, numpy.uint16) decompress(data, out=data_array) # type: ignore return data_array.reshape(shape), segmentindex, shape return cache(decode_jetraw) if self.compression in TIFF.IMAGE_COMPRESSIONS: # presume codecs always return correct dtype, native byte order... if self.fillorder == 2: logger().debug( f'{self!r} ' f'disabling LSB2MSB for compression {self.compression}' ) if unpredict: logger().debug( f'{self!r} ' f'disabling predictor for compression {self.compression}' ) def decode_image( data: bytes | None, index: int, /, *, jpegtables: bytes | None = None, jpegheader: bytes | None = None, _fullsize: bool = False, ) -> tuple[ NDArray[Any] | None, tuple[int, int, int, int, int], tuple[int, int, int, int], ]: # return decoded segment, its shape, and indices in image segmentindex, shape = indices(index) if data is None: if _fullsize: shape = pad_none(shape) return data, segmentindex, shape data_array: NDArray[Any] data_array = decompress(data) # type: ignore # del data data_array = reshape(data_array, segmentindex, shape) if _fullsize: data_array, shape = pad(data_array, shape) return data_array, segmentindex, shape return cache(decode_image) dtype = numpy.dtype(self.parent.byteorder + self._dtype.char) if self.sampleformat == 5: # complex integer if unpredict is not None: raise NotImplementedError( 'unpredicting complex integers not supported' ) itype = numpy.dtype( f'{self.parent.byteorder}i{self.bitspersample // 16}' ) ftype = numpy.dtype( f'{self.parent.byteorder}f{dtype.itemsize // 2}' ) def unpack(data: bytes, /) -> NDArray[Any]: # return complex integer as numpy.complex return numpy.frombuffer(data, itype).astype(ftype).view(dtype) elif self.bitspersample in {8, 16, 32, 64, 128}: # regular data types if (self.bitspersample * stwidth * samples) % 8: raise ValueError('data and sample size mismatch') if self.predictor in {3, 34894, 34895}: # PREDICTOR.FLOATINGPOINT # floating-point horizontal differencing decoder needs # raw byte order dtype = numpy.dtype(self._dtype.char) def unpack(data: bytes, /) -> NDArray[Any]: # return numpy array from buffer try: # read only numpy array return numpy.frombuffer(data, dtype) except ValueError: # for example, LZW strips may be missing EOI bps = self.bitspersample // 8 size = (len(data) // bps) * bps return numpy.frombuffer(data[:size], dtype) elif isinstance(self.bitspersample, tuple): # for example, RGB 565 def unpack(data: bytes, /) -> NDArray[Any]: # return numpy array from packed integers return unpack_rgb(data, dtype, self.bitspersample) elif self.bitspersample == 24 and dtype.char == 'f': # float24 if unpredict is not None: # floatpred_decode requires numpy.float24, which does not exist raise NotImplementedError('unpredicting float24 not supported') def unpack(data: bytes, /) -> NDArray[Any]: # return numpy.float32 array from float24 return imagecodecs.float24_decode( data, byteorder=self.parent.byteorder ) else: # bilevel and packed integers def unpack(data: bytes, /) -> NDArray[Any]: # return NumPy array from packed integers return imagecodecs.packints_decode( data, dtype, self.bitspersample, runlen=stwidth * samples ) def decode_other( data: bytes | None, index: int, /, *, jpegtables: bytes | None = None, jpegheader: bytes | None = None, _fullsize: bool = False, ) -> tuple[ NDArray[Any] | None, tuple[int, int, int, int, int], tuple[int, int, int, int], ]: # return decoded segment, its shape, and indices in image segmentindex, shape = indices(index) if data is None: if _fullsize: shape = pad_none(shape) return data, segmentindex, shape if self.fillorder == 2: data = imagecodecs.bitorder_decode(data) if decompress is not None: # TODO: calculate correct size for packed integers size = shape[0] * shape[1] * shape[2] * shape[3] data = decompress(data, out=size * dtype.itemsize) data_array = unpack(data) # type: ignore # del data data_array = reshape(data_array, segmentindex, shape) data_array = data_array.astype('=' + dtype.char, copy=False) if unpredict is not None: # unpredict is faster with native byte order data_array = unpredict(data_array, axis=-2, out=data_array) if _fullsize: data_array, shape = pad(data_array, shape) return data_array, segmentindex, shape return cache(decode_other) def segments( self, *, lock: threading.RLock | NullContext | None = None, maxworkers: int | None = None, func: Callable[..., Any] | None = None, # TODO: type this sort: bool = False, buffersize: int | None = None, _fullsize: bool | None = None, ) -> Iterator[ tuple[ NDArray[Any] | None, tuple[int, int, int, int, int], tuple[int, int, int, int], ] ]: """Return iterator over decoded tiles or strips. Parameters: lock: Reentrant lock to synchronize file seeks and reads. maxworkers: Maximum number of threads to concurrently decode segments. func: Function to process decoded segment. sort: Read segments from file in order of their offsets. buffersize: Approximate number of bytes to read from file in one pass. The default is :py:attr:`_TIFF.BUFFERSIZE`. _fullsize: Internal use. Yields: - Decoded segment or None for empty segments. - Position of segment in image array of normalized shape (separate sample, depth, length, width, contig sample). - Shape of segment (depth, length, width, contig samples). The shape of strips depends on their linear index. """ keyframe = self.keyframe # self or keyframe fh = self.parent.filehandle if lock is None: lock = fh.lock if _fullsize is None: _fullsize = keyframe.is_tiled decodeargs: dict[str, Any] = {'_fullsize': bool(_fullsize)} if keyframe.compression in {6, 7, 34892, 33007}: # JPEG decodeargs['jpegtables'] = self.jpegtables decodeargs['jpegheader'] = keyframe.jpegheader if func is None: def decode(args, decodeargs=decodeargs, decode=keyframe.decode): return decode(*args, **decodeargs) else: def decode(args, decodeargs=decodeargs, decode=keyframe.decode): return func(decode(*args, **decodeargs)) # type: ignore if maxworkers is None or maxworkers < 1: maxworkers = keyframe.maxworkers if maxworkers < 2: for segment in fh.read_segments( self.dataoffsets, self.databytecounts, lock=lock, sort=sort, buffersize=buffersize, flat=True, ): yield decode(segment) else: # reduce memory overhead by processing chunks of up to # buffersize of segments because ThreadPoolExecutor.map is not # collecting iterables lazily with ThreadPoolExecutor(maxworkers) as executor: for segments in fh.read_segments( self.dataoffsets, self.databytecounts, lock=lock, sort=sort, buffersize=buffersize, flat=False, ): yield from executor.map(decode, segments) def asarray( self, *, out: OutputType = None, squeeze: bool = True, lock: threading.RLock | NullContext | None = None, maxworkers: int | None = None, buffersize: int | None = None, ) -> NDArray[Any]: """Return image from page as NumPy array. Parameters: out: Specifies how image array is returned. By default, a new NumPy array is created. If a *numpy.ndarray*, a writable array to which the image is copied. If *'memmap'*, directly memory-map the image data in the file if possible; else create a memory-mapped array in a temporary file. If a *string* or *open file*, the file used to create a memory-mapped array. squeeze: Remove all length-1 dimensions (except X and Y) from image array. If *False*, return the image array with normalized 5-dimensional shape :py:attr:`TiffPage.shaped`. lock: Reentrant lock to synchronize seeks and reads from file. The default is the lock of the parent's file handle. maxworkers: Maximum number of threads to concurrently decode segments. If *None* or *0*, use up to :py:attr:`_TIFF.MAXWORKERS` threads. See remarks in :py:meth:`TiffFile.asarray`. buffersize: Approximate number of bytes to read from file in one pass. The default is :py:attr:`_TIFF.BUFFERSIZE`. Returns: NumPy array of decompressed, unpredicted, and unpacked image data read from Strip/Tile Offsets/ByteCounts, formatted according to shape and dtype metadata found in tags and arguments. Photometric conversion, premultiplied alpha, orientation, and colorimetry corrections are not applied. Specifically, CMYK images are not converted to RGB, MinIsWhite images are not inverted, color palettes are not applied, gamma is not corrected, and CFA images are not demosaciced. Exception are YCbCr JPEG compressed images, which are converted to RGB. Raises: ValueError: Format of image in file is not supported and cannot be decoded. """ keyframe = self.keyframe # self or keyframe if ( not keyframe.shaped or product(keyframe.shaped) == 0 or keyframe._dtype is None ): return numpy.empty((0,), keyframe.dtype) if len(self.dataoffsets) == 0: raise TiffFileError('missing data offset') fh = self.parent.filehandle if lock is None: lock = fh.lock if ( isinstance(out, str) and out == 'memmap' and keyframe.is_memmappable ): # direct memory map array in file with lock: closed = fh.closed if closed: warnings.warn( f'{self!r} reading array from closed file', UserWarning ) fh.open() result = fh.memmap_array( keyframe.parent.byteorder + keyframe._dtype.char, keyframe.shaped, offset=self.dataoffsets[0], ) elif keyframe.is_contiguous: # read contiguous bytes to array if keyframe.is_subsampled: raise NotImplementedError('chroma subsampling not supported') if out is not None: out = create_output(out, keyframe.shaped, keyframe._dtype) with lock: closed = fh.closed if closed: warnings.warn( f'{self!r} reading array from closed file', UserWarning ) fh.open() fh.seek(self.dataoffsets[0]) result = fh.read_array( keyframe.parent.byteorder + keyframe._dtype.char, product(keyframe.shaped), out=out, ) if keyframe.fillorder == 2: result = imagecodecs.bitorder_decode(result, out=result) if keyframe.predictor != 1: # predictors without compression unpredict = TIFF.UNPREDICTORS[keyframe.predictor] if keyframe.predictor == 1: result = unpredict(result, axis=-2, out=result) else: # floatpred cannot decode in-place out = unpredict(result, axis=-2, out=result) result[:] = out elif ( keyframe.jpegheader is not None and keyframe is self and 273 in self.tags # striped ... and self.is_tiled # but reported as tiled # TODO: imagecodecs can decode larger JPEG and self.imagewidth <= 65500 and self.imagelength <= 65500 ): # decode the whole NDPI JPEG strip with lock: closed = fh.closed if closed: warnings.warn( f'{self!r} reading array from closed file', UserWarning ) fh.open() fh.seek(self.tags[273].value[0]) # StripOffsets data = fh.read(self.tags[279].value[0]) # StripByteCounts decompress = TIFF.DECOMPRESSORS[self.compression] result = decompress( data, bitspersample=self.bitspersample, out=out, # shape=(self.imagelength, self.imagewidth) ) del data else: # decode individual strips or tiles with lock: closed = fh.closed if closed: warnings.warn( f'{self!r} reading array from closed file', UserWarning ) fh.open() keyframe.decode # init TiffPage.decode function under lock result = create_output(out, keyframe.shaped, keyframe._dtype) def func( decoderesult: tuple[ NDArray[Any] | None, tuple[int, int, int, int, int], tuple[int, int, int, int], ], keyframe: TiffPage = keyframe, out: NDArray[Any] = result, ) -> None: # copy decoded segments to output array segment, (s, d, h, w, _), shape = decoderesult if segment is None: out[ s, d : d + shape[0], h : h + shape[1], w : w + shape[2] ] = keyframe.nodata else: out[ s, d : d + shape[0], h : h + shape[1], w : w + shape[2] ] = segment[ : keyframe.imagedepth - d, : keyframe.imagelength - h, : keyframe.imagewidth - w, ] # except IndexError: # pass # corrupted file, for example, with too many strips for _ in self.segments( func=func, lock=lock, maxworkers=maxworkers, buffersize=buffersize, sort=True, _fullsize=False, ): pass result.shape = keyframe.shaped if squeeze: try: result.shape = keyframe.shape except ValueError as exc: logger().warning( f'{self!r} failed to reshape ' f'{result.shape} to {keyframe.shape}, raised {exc!r:.128}' ) if closed: # TODO: close file if an exception occurred above fh.close() return result def aszarr(self, **kwargs: Any) -> ZarrTiffStore: """Return image from page as Zarr store. Parameters: **kwarg: Passed to :py:class:`ZarrTiffStore`. """ return ZarrTiffStore(self, **kwargs) def asrgb( self, *, uint8: bool = False, alpha: Container[int] | None = None, **kwargs: Any, ) -> NDArray[Any]: """Return image as RGB(A). Work in progress. Do not use. :meta private: """ data = self.asarray(**kwargs) keyframe = self.keyframe # self or keyframe if keyframe.photometric == PHOTOMETRIC.PALETTE: colormap = keyframe.colormap if colormap is None: raise ValueError('no colormap') if ( colormap.shape[1] < 2**keyframe.bitspersample or keyframe.dtype is None or keyframe.dtype.char not in 'BH' ): raise ValueError('cannot apply colormap') if uint8: if colormap.max() > 255: colormap >>= 8 colormap = colormap.astype('uint8') if 'S' in keyframe.axes: data = data[..., 0] if keyframe.planarconfig == 1 else data[0] data = apply_colormap(data, colormap) elif keyframe.photometric == PHOTOMETRIC.RGB: if keyframe.extrasamples: if alpha is None: alpha = EXTRASAMPLE for i, exs in enumerate(keyframe.extrasamples): if exs in EXTRASAMPLE: if keyframe.planarconfig == 1: data = data[..., [0, 1, 2, 3 + i]] else: data = data[:, [0, 1, 2, 3 + i]] break else: if keyframe.planarconfig == 1: data = data[..., :3] else: data = data[:, :3] # TODO: convert to uint8? elif keyframe.photometric == PHOTOMETRIC.MINISBLACK: raise NotImplementedError elif keyframe.photometric == PHOTOMETRIC.MINISWHITE: raise NotImplementedError elif keyframe.photometric == PHOTOMETRIC.SEPARATED: raise NotImplementedError else: raise NotImplementedError return data def _gettags( self, codes: Container[int] | None = None, /, lock: threading.RLock | None = None, ) -> list[tuple[int, TiffTag]]: """Return list of (code, TiffTag).""" return [ (tag.code, tag) for tag in self.tags if codes is None or tag.code in codes ] def _nextifd(self) -> int: """Return offset to next IFD from file.""" fh = self.parent.filehandle tiff = self.parent.tiff fh.seek(self.offset) tagno = struct.unpack(tiff.tagnoformat, fh.read(tiff.tagnosize))[0] fh.seek(self.offset + tiff.tagnosize + tagno * tiff.tagsize) return int( struct.unpack(tiff.offsetformat, fh.read(tiff.offsetsize))[0] ) def aspage(self) -> TiffPage: """Return TiffPage instance.""" return self @property def index(self) -> int: """Index of page in IFD chain.""" return self._index[-1] @property def treeindex(self) -> tuple[int, ...]: """Index of page in IFD tree.""" return self._index @property def keyframe(self) -> TiffPage: """Self.""" return self @keyframe.setter def keyframe(self, index: TiffPage) -> None: return @property def name(self) -> str: """Name of image array.""" index = self._index if len(self._index) > 1 else self._index[0] return f'TiffPage {index}' @property def ndim(self) -> int: """Number of dimensions in image array.""" return len(self.shape) @cached_property def dims(self) -> tuple[str, ...]: """Names of dimensions in image array.""" names = TIFF.AXES_NAMES return tuple(names[ax] for ax in self.axes) @cached_property def sizes(self) -> dict[str, int]: """Ordered map of dimension names to lengths.""" shape = self.shape names = TIFF.AXES_NAMES return {names[ax]: shape[i] for i, ax in enumerate(self.axes)} @cached_property def coords(self) -> dict[str, NDArray[Any]]: """Ordered map of dimension names to coordinate arrays.""" resolution = self.get_resolution() coords: dict[str, NDArray[Any]] = {} for ax, size in zip(self.axes, self.shape): name = TIFF.AXES_NAMES[ax] value = None step: int | float = 1 if ax == 'X': step = resolution[0] elif ax == 'Y': step = resolution[1] elif ax == 'S': value = self._sample_names() elif ax == 'Z': # a ZResolution tag doesn't exist. # use XResolution if it agrees with YResolution if resolution[0] == resolution[1]: step = resolution[0] if value is not None: coords[name] = numpy.asarray(value) elif step == 0 or step == 1 or size == 0: coords[name] = numpy.arange(size) else: coords[name] = numpy.linspace( 0, size / step, size, endpoint=False, dtype=numpy.float32 ) assert len(coords[name]) == size return coords @cached_property def attr(self) -> dict[str, Any]: """Arbitrary metadata associated with image array.""" # TODO: what to return? return {} @cached_property def size(self) -> int: """Number of elements in image array.""" return product(self.shape) @cached_property def nbytes(self) -> int: """Number of bytes in image array.""" if self.dtype is None: return 0 return self.size * self.dtype.itemsize @property def colormap(self) -> NDArray[numpy.uint16] | None: """Value of Colormap tag.""" return self.tags.valueof(320) @property def iccprofile(self) -> bytes | None: """Value of InterColorProfile tag.""" return self.tags.valueof(34675) @property def transferfunction(self) -> NDArray[numpy.uint16] | None: """Value of TransferFunction tag.""" return self.tags.valueof(301) def get_resolution( self, unit: RESUNIT | int | str | None = None, scale: float | int | None = None, ) -> tuple[int | float, int | float]: """Return number of pixels per unit in X and Y dimensions. By default, the XResolution and YResolution tag values are returned. Missing tag values are set to 1. Parameters: unit: Unit of measurement of returned values. The default is the value of the ResolutionUnit tag. scale: Factor to convert resolution values to meter unit. The default is determined from the ResolutionUnit tag. """ scales = { 1: 1, # meter, no unit 2: 100 / 2.54, # INCH 3: 100, # CENTIMETER 4: 1000, # MILLIMETER 5: 1000000, # MICROMETER } if unit is not None: unit = enumarg(RESUNIT, unit) try: if scale is None: resolutionunit = self.tags.valueof(296, default=2) scale = scales[resolutionunit] except Exception as exc: logger().warning( f'{self!r} raised {exc!r:.128}' ) scale = 1 else: scale2 = scales[unit] if scale % scale2 == 0: scale //= scale2 else: scale /= scale2 elif scale is None: scale = 1 resolution: list[int | float] = [] n: int d: int for code in 282, 283: try: n, d = self.tags.valueof(code, default=(1, 1)) if d == 0: value = n * scale elif n % d == 0: value = n // d * scale else: value = n / d * scale except Exception: value = 1 resolution.append(value) return resolution[0], resolution[1] @cached_property def resolution(self) -> tuple[float, float]: """Number of pixels per resolutionunit in X and Y directions.""" # values are returned in (somewhat unexpected) XY order to # keep symmetry with the TiffWriter.write resolution argument resolution = self.get_resolution() return float(resolution[0]), float(resolution[1]) @property def resolutionunit(self) -> int: """Unit of measurement for X and Y resolutions.""" return self.tags.valueof(296, default=2) @property def datetime(self) -> datetime.datetime | None: """Date and time of image creation.""" value = self.tags.valueof(306) if value is None: return None try: return strptime(value) except Exception: pass return None @property def tile(self) -> tuple[int, ...] | None: """Tile depth, length, and width.""" if not self.is_tiled: return None if self.tiledepth > 1: return (self.tiledepth, self.tilelength, self.tilewidth) return (self.tilelength, self.tilewidth) @cached_property def chunks(self) -> tuple[int, ...]: """Shape of images in tiles or strips.""" shape: list[int] = [] if self.tiledepth > 1: shape.append(self.tiledepth) if self.is_tiled: shape.extend((self.tilelength, self.tilewidth)) else: shape.extend((self.rowsperstrip, self.imagewidth)) if self.planarconfig == 1 and self.samplesperpixel > 1: shape.append(self.samplesperpixel) return tuple(shape) @cached_property def chunked(self) -> tuple[int, ...]: """Shape of chunked image.""" shape: list[int] = [] if self.planarconfig == 2 and self.samplesperpixel > 1: shape.append(self.samplesperpixel) if self.is_tiled: if self.imagedepth > 1: shape.append( (self.imagedepth + self.tiledepth - 1) // self.tiledepth ) shape.append( (self.imagelength + self.tilelength - 1) // self.tilelength ) shape.append( (self.imagewidth + self.tilewidth - 1) // self.tilewidth ) else: if self.imagedepth > 1: shape.append(self.imagedepth) shape.append( (self.imagelength + self.rowsperstrip - 1) // self.rowsperstrip ) shape.append(1) if self.planarconfig == 1 and self.samplesperpixel > 1: shape.append(1) return tuple(shape) @cached_property def hash(self) -> int: """Checksum to identify pages in same series. Pages with the same hash can use the same decode function. The hash is calculated from the following properties: :py:attr:`TiffFile.tiff`, :py:attr:`TiffPage.shaped`, :py:attr:`TiffPage.rowsperstrip`, :py:attr:`TiffPage.tilewidth`, :py:attr:`TiffPage.tilelength`, :py:attr:`TiffPage.tiledepth`, :py:attr:`TiffPage.sampleformat`, :py:attr:`TiffPage.bitspersample`, :py:attr:`TiffPage.fillorder`, :py:attr:`TiffPage.predictor`, :py:attr:`TiffPage.compression`, :py:attr:`TiffPage.extrasamples`, and :py:attr:`TiffPage.photometric`. """ return hash( self.shaped + ( self.parent.tiff, self.rowsperstrip, self.tilewidth, self.tilelength, self.tiledepth, self.sampleformat, self.bitspersample, self.fillorder, self.predictor, self.compression, self.extrasamples, self.photometric, ) ) @cached_property def pages(self) -> TiffPages | None: """Sequence of sub-pages, SubIFDs.""" if 330 not in self.tags: return None return TiffPages(self, index=self.index) @cached_property def maxworkers(self) -> int: """Maximum number of threads for decoding segments. A value of 0 disables multi-threading also when stacking pages. """ if self.is_contiguous or self.dtype is None: return 0 if self.compression in TIFF.IMAGE_COMPRESSIONS: return min(TIFF.MAXWORKERS, len(self.dataoffsets)) bytecount = product(self.chunks) * self.dtype.itemsize if bytecount < 2048: # disable multi-threading for small segments return 0 if self.compression == 5 and bytecount < 14336: # disable multi-threading for small LZW compressed segments return 0 if len(self.dataoffsets) < 4: return 1 if self.compression != 1 or self.fillorder != 1 or self.predictor != 1: if imagecodecs is not None: return min(TIFF.MAXWORKERS, len(self.dataoffsets)) return 2 # optimum for large number of uncompressed tiles @cached_property def is_contiguous(self) -> bool: """Image data is stored contiguously. Contiguous image data can be read from ``offset=TiffPage.dataoffsets[0]`` with ``size=TiffPage.nbytes``. Excludes prediction and fillorder. """ if ( self.sampleformat == 5 or self.compression != 1 or self.bitspersample not in {8, 16, 32, 64} ): return False if 322 in self.tags: # TileWidth if ( self.imagewidth != self.tilewidth or self.imagelength % self.tilelength or self.tilewidth % 16 or self.tilelength % 16 ): return False if ( 32997 in self.tags # ImageDepth and 32998 in self.tags # TileDepth and ( self.imagelength != self.tilelength or self.imagedepth % self.tiledepth ) ): return False offsets = self.dataoffsets bytecounts = self.databytecounts if len(offsets) == 0: return False if len(offsets) == 1: return True if self.is_stk or self.is_lsm: return True if sum(bytecounts) != self.nbytes: return False if all( bytecounts[i] != 0 and offsets[i] + bytecounts[i] == offsets[i + 1] for i in range(len(offsets) - 1) ): return True return False @cached_property def is_final(self) -> bool: """Image data are stored in final form. Excludes byte-swapping.""" return ( self.is_contiguous and self.fillorder == 1 and self.predictor == 1 and not self.is_subsampled ) @cached_property def is_memmappable(self) -> bool: """Image data in file can be memory-mapped to NumPy array.""" return ( self.parent.filehandle.is_file and self.is_final # and (self.bitspersample == 8 or self.parent.isnative) # aligned? and self.dtype is not None and self.dataoffsets[0] % self.dtype.itemsize == 0 ) def __repr__(self) -> str: index = self._index if len(self._index) > 1 else self._index[0] return f'' def __str__(self) -> str: return self._str() def _str(self, detail: int = 0, width: int = 79) -> str: """Return string containing information about TiffPage.""" if self.keyframe != self: return TiffFrame._str(self, detail, width) # type: ignore attr = '' for name in ('memmappable', 'final', 'contiguous'): attr = getattr(self, 'is_' + name) if attr: attr = name.upper() break def tostr(name: str, /, skip: int = 1) -> str: obj = getattr(self, name) if obj == skip: return '' try: value = getattr(obj, 'name') except AttributeError: return '' return str(value) info = ' '.join( s.lower() for s in ( 'x'.join(str(i) for i in self.shape), f'{SAMPLEFORMAT(self.sampleformat).name}{self.bitspersample}', ' '.join( i for i in ( PHOTOMETRIC(self.photometric).name, 'REDUCED' if self.is_reduced else '', 'MASK' if self.is_mask else '', 'TILED' if self.is_tiled else '', tostr('compression'), tostr('planarconfig'), tostr('predictor'), tostr('fillorder'), ) + (attr,) if i ), '|'.join(f.upper() for f in sorted(self.flags)), ) if s ) index = self._index if len(self._index) > 1 else self._index[0] info = f'TiffPage {index} @{self.offset} {info}' if detail <= 0: return info info_list = [info, self.tags._str(detail + 1, width=width)] if detail > 1: for name in ('ndpi',): name = name + '_tags' attr = getattr(self, name, '') if attr: info_list.append( '{}\n{}'.format( name.upper(), pformat(attr, width=width, height=detail * 8), ) ) if detail > 3: try: info_list.append( 'DATA\n{}'.format( pformat(self.asarray(), width=width, height=detail * 8) ) ) except Exception: pass return '\n\n'.join(info_list) def _sample_names(self) -> list[str] | None: """Return names of samples.""" if 'S' not in self.axes: return None samples = self.shape[self.axes.find('S')] extrasamples = len(self.extrasamples) if samples < 1 or extrasamples > 2: return None if self.photometric == 0: names = ['WhiteIsZero'] elif self.photometric == 1: names = ['BlackIsZero'] elif self.photometric == 2: names = ['Red', 'Green', 'Blue'] elif self.photometric == 5: names = ['Cyan', 'Magenta', 'Yellow', 'Black'] elif self.photometric == 6: if self.compression in {6, 7, 34892, 33007}: # YCBCR -> RGB for JPEG names = ['Red', 'Green', 'Blue'] else: names = ['Luma', 'Cb', 'Cr'] else: return None if extrasamples > 0: names += [enumarg(EXTRASAMPLE, self.extrasamples[0]).name.title()] if extrasamples > 1: names += [enumarg(EXTRASAMPLE, self.extrasamples[1]).name.title()] if len(names) != samples: return None return names @cached_property def flags(self) -> set[str]: r"""Set of ``is\_\*`` properties that are True.""" return { name.lower() for name in TIFF.PAGE_FLAGS if getattr(self, 'is_' + name) } @cached_property def andor_tags(self) -> dict[str, Any] | None: """Consolidated metadata from Andor tags.""" if not self.is_andor: return None result = {'Id': self.tags[4864].value} # AndorId for tag in self.tags: # list(self.tags.values()): code = tag.code if not 4864 < code < 5031: continue name = tag.name name = name[5:] if len(name) > 5 else name result[name] = tag.value # del self.tags[code] return result @cached_property def epics_tags(self) -> dict[str, Any] | None: """Consolidated metadata from EPICS areaDetector tags. Use the :py:func:`epics_datetime` function to get a datetime object from the epicsTSSec and epicsTSNsec tags. """ if not self.is_epics: return None result = {} for tag in self.tags: # list(self.tags.values()): code = tag.code if not 65000 <= code < 65500: continue value = tag.value if code == 65000: # not a POSIX timestamp # https://github.com/bluesky/area-detector-handlers/issues/20 result['timeStamp'] = float(value) elif code == 65001: result['uniqueID'] = int(value) elif code == 65002: result['epicsTSSec'] = int(value) elif code == 65003: result['epicsTSNsec'] = int(value) else: key, value = value.split(':', 1) result[key] = astype(value) # del self.tags[code] return result @cached_property def ndpi_tags(self) -> dict[str, Any] | None: """Consolidated metadata from Hamamatsu NDPI tags.""" # TODO: parse 65449 ini style comments if not self.is_ndpi: return None tags = self.tags result = {} for name in ('Make', 'Model', 'Software'): result[name] = tags[name].value for code, name in TIFF.NDPI_TAGS.items(): if code in tags: result[name] = tags[code].value # del tags[code] if 'McuStarts' in result: mcustarts = result['McuStarts'] if 'McuStartsHighBytes' in result: high = result['McuStartsHighBytes'].astype('uint64') high <<= 32 mcustarts = mcustarts.astype('uint64') mcustarts += high del result['McuStartsHighBytes'] result['McuStarts'] = mcustarts return result @cached_property def geotiff_tags(self) -> dict[str, Any] | None: """Consolidated metadata from GeoTIFF tags.""" if not self.is_geotiff: return None tags = self.tags gkd = tags.valueof(34735) # GeoKeyDirectoryTag if gkd is None or len(gkd) < 2 or gkd[0] != 1: logger().warning(f'{self!r} invalid GeoKeyDirectoryTag') return {} result = { 'KeyDirectoryVersion': gkd[0], 'KeyRevision': gkd[1], 'KeyRevisionMinor': gkd[2], # 'NumberOfKeys': gkd[3], } # deltags = ['GeoKeyDirectoryTag'] geokeys = TIFF.GEO_KEYS geocodes = TIFF.GEO_CODES for index in range(gkd[3]): try: keyid, tagid, count, offset = gkd[ 4 + index * 4 : index * 4 + 8 ] except Exception as exc: logger().warning( f'{self!r} corrupted GeoKeyDirectoryTag ' f'raised {exc!r:.128}' ) continue if tagid == 0: value = offset else: try: value = tags[tagid].value[offset : offset + count] except TiffFileError as exc: logger().warning( f'{self!r} corrupted GeoKeyDirectoryTag {tagid} ' f'raised {exc!r:.128}' ) continue except KeyError as exc: logger().warning( f'{self!r} GeoKeyDirectoryTag {tagid} not found, ' f'raised {exc!r:.128}' ) continue if tagid == 34737 and count > 1 and value[-1] == '|': value = value[:-1] value = value if count > 1 else value[0] if keyid in geocodes: try: value = geocodes[keyid](value) except Exception: pass try: key = geokeys(keyid).name except ValueError: key = keyid result[key] = value value = tags.valueof(33920) # IntergraphMatrixTag if value is not None: value = numpy.array(value) if value.size == 16: value = value.reshape((4, 4)).tolist() result['IntergraphMatrix'] = value value = tags.valueof(33550) # ModelPixelScaleTag if value is not None: result['ModelPixelScale'] = numpy.array(value).tolist() value = tags.valueof(33922) # ModelTiepointTag if value is not None: value = numpy.array(value).reshape((-1, 6)).squeeze().tolist() result['ModelTiepoint'] = value value = tags.valueof(34264) # ModelTransformationTag if value is not None: value = numpy.array(value).reshape((4, 4)).tolist() result['ModelTransformation'] = value # if 33550 in tags and 33922 in tags: # sx, sy, sz = tags[33550].value # ModelPixelScaleTag # tiepoints = tags[33922].value # ModelTiepointTag # transforms = [] # for tp in range(0, len(tiepoints), 6): # i, j, k, x, y, z = tiepoints[tp : tp + 6] # transforms.append( # [ # [sx, 0.0, 0.0, x - i * sx], # [0.0, -sy, 0.0, y + j * sy], # [0.0, 0.0, sz, z - k * sz], # [0.0, 0.0, 0.0, 1.0], # ] # ) # if len(tiepoints) == 6: # transforms = transforms[0] # result['ModelTransformation'] = transforms rpcc = tags.valueof(50844) # RPCCoefficientTag if rpcc is not None: result['RPCCoefficient'] = { 'ERR_BIAS': rpcc[0], 'ERR_RAND': rpcc[1], 'LINE_OFF': rpcc[2], 'SAMP_OFF': rpcc[3], 'LAT_OFF': rpcc[4], 'LONG_OFF': rpcc[5], 'HEIGHT_OFF': rpcc[6], 'LINE_SCALE': rpcc[7], 'SAMP_SCALE': rpcc[8], 'LAT_SCALE': rpcc[9], 'LONG_SCALE': rpcc[10], 'HEIGHT_SCALE': rpcc[11], 'LINE_NUM_COEFF': rpcc[12:33], 'LINE_DEN_COEFF ': rpcc[33:53], 'SAMP_NUM_COEFF': rpcc[53:73], 'SAMP_DEN_COEFF': rpcc[73:], } return result @cached_property def shaped_description(self) -> str | None: """Description containing array shape if exists, else None.""" for description in (self.description, self.description1): if not description or '"mibi.' in description: return None if description[:1] == '{' and '"shape":' in description: return description if description[:6] == 'shape=': return description return None @cached_property def imagej_description(self) -> str | None: """ImageJ description if exists, else None.""" for description in (self.description, self.description1): if not description: return None if description[:7] == 'ImageJ=' or description[:7] == 'SCIFIO=': return description return None @cached_property def is_jfif(self) -> bool: """JPEG compressed segments contain JFIF metadata.""" if ( self.compression not in {6, 7, 34892, 33007} or len(self.dataoffsets) < 1 or self.dataoffsets[0] == 0 or len(self.databytecounts) < 1 or self.databytecounts[0] < 11 ): return False fh = self.parent.filehandle fh.seek(self.dataoffsets[0] + 6) data = fh.read(4) return data == b'JFIF' # or data == b'Exif' @property def is_frame(self) -> bool: """Object is :py:class:`TiffFrame` instance.""" return False @property def is_virtual(self) -> bool: """Page does not have IFD structure in file.""" return False @property def is_subifd(self) -> bool: """Page is SubIFD of another page.""" return len(self._index) > 1 @property def is_reduced(self) -> bool: """Page is reduced image of another image.""" return bool(self.subfiletype & 0b1) @property def is_multipage(self) -> bool: """Page is part of multi-page image.""" return bool(self.subfiletype & 0b10) @property def is_mask(self) -> bool: """Page is transparency mask for another image.""" return bool(self.subfiletype & 0b100) @property def is_mrc(self) -> bool: """Page is part of Mixed Raster Content.""" return bool(self.subfiletype & 0b1000) @property def is_tiled(self) -> bool: """Page contains tiled image.""" return self.tilewidth > 0 # return 322 in self.tags # TileWidth @property def is_subsampled(self) -> bool: """Page contains chroma subsampled image.""" if self.subsampling is not None: return self.subsampling != (1, 1) return self.photometric == 6 # YCbCr # RGB JPEG usually stored as subsampled YCbCr # self.compression == 7 # and self.photometric == 2 # and self.planarconfig == 1 @property def is_imagej(self) -> bool: """Page contains ImageJ description metadata.""" return self.imagej_description is not None @property def is_shaped(self) -> bool: """Page contains Tifffile JSON metadata.""" return self.shaped_description is not None @property def is_mdgel(self) -> bool: """Page contains MDFileTag tag.""" return ( 37701 not in self.tags # AgilentBinary and 33445 in self.tags # MDFileTag ) @property def is_agilent(self) -> bool: """Page contains Agilent Technologies tags.""" # tag 270 and 285 contain color names return 285 in self.tags and 37701 in self.tags # AgilentBinary @property def is_mediacy(self) -> bool: """Page contains Media Cybernetics Id tag.""" tag = self.tags.get(50288) # MC_Id try: return tag is not None and tag.value[:7] == b'MC TIFF' except Exception: return False @property def is_stk(self) -> bool: """Page contains UIC1Tag tag.""" return 33628 in self.tags @property def is_lsm(self) -> bool: """Page contains CZ_LSMINFO tag.""" return 34412 in self.tags @property def is_fluoview(self) -> bool: """Page contains FluoView MM_STAMP tag.""" return 34362 in self.tags @property def is_nih(self) -> bool: """Page contains NIHImageHeader tag.""" return 43314 in self.tags @property def is_volumetric(self) -> bool: """Page contains SGI ImageDepth tag with value > 1.""" return self.imagedepth > 1 @property def is_vista(self) -> bool: """Software tag is 'ISS Vista'.""" return self.software == 'ISS Vista' @property def is_metaseries(self) -> bool: """Page contains MDS MetaSeries metadata in ImageDescription tag.""" if self.index != 0 or self.software != 'MetaSeries': return False d = self.description return d.startswith('') and d.endswith('') @property def is_ome(self) -> bool: """Page contains OME-XML in ImageDescription tag.""" if self.index != 0 or not self.description: return False return self.description[-10:].strip().endswith('OME>') @property def is_scn(self) -> bool: """Page contains Leica SCN XML in ImageDescription tag.""" if self.index != 0 or not self.description: return False return self.description[-10:].strip().endswith('') @property def is_micromanager(self) -> bool: """Page contains MicroManagerMetadata tag.""" return 51123 in self.tags @property def is_andor(self) -> bool: """Page contains Andor Technology tags 4864-5030.""" return 4864 in self.tags @property def is_pilatus(self) -> bool: """Page contains Pilatus tags.""" return self.software[:8] == 'TVX TIFF' and self.description[:2] == '# ' @property def is_epics(self) -> bool: """Page contains EPICS areaDetector tags.""" return ( self.description == 'EPICS areaDetector' or self.software == 'EPICS areaDetector' ) @property def is_tvips(self) -> bool: """Page contains TVIPS metadata.""" return 37706 in self.tags @property def is_fei(self) -> bool: """Page contains FEI_SFEG or FEI_HELIOS tags.""" return 34680 in self.tags or 34682 in self.tags @property def is_sem(self) -> bool: """Page contains CZ_SEM tag.""" return 34118 in self.tags @property def is_svs(self) -> bool: """Page contains Aperio metadata.""" return self.description[:7] == 'Aperio ' @property def is_bif(self) -> bool: """Page contains Ventana metadata.""" try: return 700 in self.tags and ( # avoid reading XMP tag from file at this point # b' bool: """Page contains ScanImage metadata.""" return ( self.software[:3] == 'SI.' or self.description[:6] == 'state.' or 'scanimage.SI' in self.description[-256:] ) @property def is_indica(self) -> bool: """Page contains IndicaLabs metadata.""" return self.software[:21] == 'IndicaLabsImageWriter' @property def is_qpi(self) -> bool: """Page contains PerkinElmer tissue images metadata.""" # The ImageDescription tag contains XML with a top-level # element return self.software[:15] == 'PerkinElmer-QPI' @property def is_geotiff(self) -> bool: """Page contains GeoTIFF metadata.""" return 34735 in self.tags # GeoKeyDirectoryTag @property def is_gdal(self) -> bool: """Page contains GDAL metadata.""" # startswith '' return 42112 in self.tags # GDAL_METADATA @property def is_astrotiff(self) -> bool: """Page contains AstroTIFF FITS metadata.""" return ( self.description[:7] == 'SIMPLE ' and self.description[-3:] == 'END' ) @property def is_streak(self) -> bool: """Page contains Hamamatsu streak metadata.""" return ( self.description[:1] == '[' and '],' in self.description[1:32] # and self.tags.get(315, '').value[:19] == 'Copyright Hamamatsu' ) @property def is_dng(self) -> bool: """Page contains DNG metadata.""" return 50706 in self.tags # DNGVersion @property def is_tiffep(self) -> bool: """Page contains TIFF/EP metadata.""" return 37398 in self.tags # TIFF/EPStandardID @property def is_sis(self) -> bool: """Page contains Olympus SIS metadata.""" return 33560 in self.tags or 33471 in self.tags @property def is_ndpi(self) -> bool: """Page contains NDPI metadata.""" return 65420 in self.tags and 271 in self.tags @property def is_philips(self) -> bool: """Page contains Philips DP metadata.""" return self.software[:10] == 'Philips DP' and self.description[ -16: ].strip().endswith('') @property def is_eer(self) -> bool: """Page contains EER acquisition metadata.""" return ( self.parent.is_bigtiff and self.compression in {1, 65000, 65001, 65002} and 65001 in self.tags and self.tags[65001].dtype == 7 ) @final class TiffFrame: """Lightweight TIFF image file directory (IFD). The purpose of TiffFrame is to reduce resource usage and speed up reading image data from file compared to TiffPage. Properties other than `offset`, `index`, `dataoffsets`, `databytecounts`, `subifds`, and `jpegtables` are assumed to be identical with a specified TiffPage instance, the keyframe. TiffFrame instances have no `tags` property. Virtual frames just reference the image data in the file. They may not have an IFD structure in the file. TiffFrame instances are not thread-safe. All attributes are read-only. Parameters: parent: TiffFile instance to read frame from. The file handle position must be at an offset to an IFD structure. Only a limited number of tag values are read from file. index: Index of frame in IFD tree. offset: Position of frame in file. keyframe: TiffPage instance with same hash as frame. dataoffsets: Data offsets of "virtual frame". databytecounts: Data bytecounts of "virtual frame". """ __slots__ = ( 'parent', 'offset', 'dataoffsets', 'databytecounts', 'subifds', 'jpegtables', '_keyframe', '_index', ) is_mdgel: bool = False pages: TiffPages | None = None # tags = {} parent: TiffFile """TiffFile instance frame belongs to.""" offset: int """Position of frame in file.""" dataoffsets: tuple[int, ...] """Positions of strips or tiles in file.""" databytecounts: tuple[int, ...] """Size of strips or tiles in file.""" subifds: tuple[int, ...] | None """Positions of SubIFDs in file.""" jpegtables: bytes | None """JPEG quantization and/or Huffman tables.""" _keyframe: TiffPage | None _index: tuple[int, ...] # index of frame in IFD tree. def __init__( self, parent: TiffFile, /, index: int | Sequence[int], *, offset: int | None = None, keyframe: TiffPage | None = None, dataoffsets: tuple[int, ...] | None = None, databytecounts: tuple[int, ...] | None = None, ): self._keyframe = None self.parent = parent self.offset = int(offset) if offset else 0 self.subifds = None self.jpegtables = None self.dataoffsets = () self.databytecounts = () if isinstance(index, int): self._index = (index,) else: self._index = tuple(index) if dataoffsets is not None and databytecounts is not None: # initialize "virtual frame" from offsets and bytecounts self.offset = 0 if offset is None else offset self.dataoffsets = dataoffsets self.databytecounts = databytecounts self._keyframe = keyframe return if offset is None: self.offset = parent.filehandle.tell() else: parent.filehandle.seek(offset) if keyframe is None: tags = {273, 279, 324, 325, 330, 347} elif keyframe.is_contiguous: # use databytecounts from keyframe tags = {256, 273, 324, 330} self.databytecounts = keyframe.databytecounts else: tags = {256, 273, 279, 324, 325, 330, 347} for code, tag in self._gettags(tags): if code in {273, 324}: self.dataoffsets = tag.value elif code in {279, 325}: self.databytecounts = tag.value elif code == 330: self.subifds = tag.value elif code == 347: self.jpegtables = tag.value elif keyframe is None or ( code == 256 and keyframe.imagewidth != tag.value ): raise RuntimeError('incompatible keyframe') if not self.dataoffsets: logger().warning(f'{self!r} is missing required tags') elif keyframe is not None and len(self.dataoffsets) != len( keyframe.dataoffsets ): raise RuntimeError('incompatible keyframe') if keyframe is not None: self.keyframe = keyframe def _gettags( self, codes: Container[int] | None = None, /, lock: threading.RLock | None = None, ) -> list[tuple[int, TiffTag]]: """Return list of (code, TiffTag) from file.""" fh = self.parent.filehandle tiff = self.parent.tiff unpack = struct.unpack rlock: Any = NullContext() if lock is None else lock tags = [] with rlock: fh.seek(self.offset) try: tagno = unpack(tiff.tagnoformat, fh.read(tiff.tagnosize))[0] if tagno > 4096: raise ValueError(f'suspicious number of tags {tagno}') except Exception as exc: raise TiffFileError( f'corrupted tag list @{self.offset}' ) from exc tagoffset = self.offset + tiff.tagnosize # fh.tell() tagsize = tiff.tagsize tagindex = -tagsize codeformat = tiff.tagformat1[:2] tagbytes = fh.read(tagsize * tagno) for _ in range(tagno): tagindex += tagsize code = unpack(codeformat, tagbytes[tagindex : tagindex + 2])[0] if codes and code not in codes: continue try: tag = TiffTag.fromfile( self.parent, offset=tagoffset + tagindex, header=tagbytes[tagindex : tagindex + tagsize], ) except TiffFileError as exc: logger().error( f'{self!r} raised {exc!r:.128}' ) continue tags.append((code, tag)) return tags def _nextifd(self) -> int: """Return offset to next IFD from file.""" return TiffPage._nextifd(self) # type: ignore def aspage(self) -> TiffPage: """Return TiffPage from file. Raise ValueError if frame is virtual. """ if self.is_virtual: raise ValueError('cannot return virtual frame as page') fh = self.parent.filehandle closed = fh.closed if closed: # this is an inefficient resort in case a user calls aspage # of a TiffFrame with a closed FileHandle. warnings.warn( f'{self!r} reading TiffPage from closed file', UserWarning ) fh.open() try: fh.seek(self.offset) page = TiffPage(self.parent, index=self.index) finally: if closed: fh.close() return page def asarray(self, *args: Any, **kwargs: Any) -> NDArray[Any]: """Return image from frame as NumPy array. Parameters: **kwargs: Arguments passed to :py:meth:`TiffPage.asarray`. """ return TiffPage.asarray(self, *args, **kwargs) # type: ignore def aszarr(self, **kwargs: Any) -> ZarrTiffStore: """Return image from frame as Zarr store. Parameters: **kwarg: Arguments passed to :py:class:`ZarrTiffStore`. """ return ZarrTiffStore(self, **kwargs) def asrgb(self, *args: Any, **kwargs: Any) -> NDArray[Any]: """Return image from frame as RGB(A). Work in progress. Do not use. :meta private: """ return TiffPage.asrgb(self, *args, **kwargs) # type: ignore def segments(self, *args: Any, **kwargs: Any) -> Iterator[ tuple[ NDArray[Any] | None, tuple[int, int, int, int, int], tuple[int, int, int, int], ] ]: """Return iterator over decoded tiles or strips. Parameters: **kwargs: Arguments passed to :py:meth:`TiffPage.segments`. :meta private: """ return TiffPage.segments(self, *args, **kwargs) # type: ignore @property def index(self) -> int: """Index of frame in IFD chain.""" return self._index[-1] @property def treeindex(self) -> tuple[int, ...]: """Index of frame in IFD tree.""" return self._index @property def keyframe(self) -> TiffPage | None: """TiffPage with same properties as this frame.""" return self._keyframe @keyframe.setter def keyframe(self, keyframe: TiffPage, /) -> None: if self._keyframe == keyframe: return if self._keyframe is not None: raise RuntimeError('cannot reset keyframe') if len(self.dataoffsets) != len(keyframe.dataoffsets): raise RuntimeError('incompatible keyframe') if keyframe.is_contiguous: self.databytecounts = keyframe.databytecounts self._keyframe = keyframe @property def is_frame(self) -> bool: """Object is :py:class:`TiffFrame` instance.""" return True @property def is_virtual(self) -> bool: """Frame does not have IFD structure in file.""" return self.offset <= 0 @property def is_subifd(self) -> bool: """Frame is SubIFD of another page.""" return len(self._index) > 1 @property def is_final(self) -> bool: assert self._keyframe is not None return self._keyframe.is_final @property def is_contiguous(self) -> bool: assert self._keyframe is not None return self._keyframe.is_contiguous @property def is_memmappable(self) -> bool: assert self._keyframe is not None return self._keyframe.is_memmappable @property def hash(self) -> int: assert self._keyframe is not None return self._keyframe.hash @property def shape(self) -> tuple[int, ...]: assert self._keyframe is not None return self._keyframe.shape @property def shaped(self) -> tuple[int, int, int, int, int]: assert self._keyframe is not None return self._keyframe.shaped @property def chunks(self) -> tuple[int, ...]: assert self._keyframe is not None return self._keyframe.chunks @property def chunked(self) -> tuple[int, ...]: assert self._keyframe is not None return self._keyframe.chunked @property def tile(self) -> tuple[int, ...] | None: assert self._keyframe is not None return self._keyframe.tile @property def name(self) -> str: index = self._index if len(self._index) > 1 else self._index[0] return f'TiffFrame {index}' @property def ndim(self) -> int: assert self._keyframe is not None return self._keyframe.ndim @property def dims(self) -> tuple[str, ...]: assert self._keyframe is not None return self._keyframe.dims @property def sizes(self) -> dict[str, int]: assert self._keyframe is not None return self._keyframe.sizes @property def coords(self) -> dict[str, NDArray[Any]]: assert self._keyframe is not None return self._keyframe.coords @property def size(self) -> int: assert self._keyframe is not None return self._keyframe.size @property def nbytes(self) -> int: assert self._keyframe is not None return self._keyframe.nbytes @property def dtype(self) -> numpy.dtype[Any] | None: assert self._keyframe is not None return self._keyframe.dtype @property def axes(self) -> str: assert self._keyframe is not None return self._keyframe.axes def get_resolution( self, unit: RESUNIT | int | None = None, scale: float | int | None = None, ) -> tuple[int | float, int | float]: assert self._keyframe is not None return self._keyframe.get_resolution(unit, scale) @property def resolution(self) -> tuple[float, float]: assert self._keyframe is not None return self._keyframe.resolution @property def resolutionunit(self) -> int: assert self._keyframe is not None return self._keyframe.resolutionunit @property def datetime(self) -> datetime.datetime | None: # TODO: TiffFrame.datetime can differ from TiffPage.datetime? assert self._keyframe is not None return self._keyframe.datetime @property def compression(self) -> int: assert self._keyframe is not None return self._keyframe.compression @property def decode( self, ) -> Callable[ ..., tuple[ NDArray[Any] | None, tuple[int, int, int, int, int], tuple[int, int, int, int], ], ]: assert self._keyframe is not None return self._keyframe.decode def __repr__(self) -> str: index = self._index if len(self._index) > 1 else self._index[0] return f'' def __str__(self) -> str: return self._str() def _str(self, detail: int = 0, width: int = 79) -> str: """Return string containing information about TiffFrame.""" if self._keyframe is None: info = '' kf = None else: info = ' '.join( s for s in ( 'x'.join(str(i) for i in self.shape), str(self.dtype), ) ) kf = self._keyframe._str(width=width - 11) if detail > 3: of = pformat(self.dataoffsets, width=width - 9, height=detail - 3) bc = pformat( self.databytecounts, width=width - 13, height=detail - 3 ) info = f'\n Keyframe {kf}\n Offsets {of}\n Bytecounts {bc}' index = self._index if len(self._index) > 1 else self._index[0] return f'TiffFrame {index} @{self.offset} {info}' @final class TiffTag: """TIFF tag structure. TiffTag instances are not thread-safe. All attributes are read-only. Parameters: parent: TIFF file tag belongs to. offset: Position of tag structure in file. code: Decimal code of tag. dtype: Data type of tag value item. count: Number of items in tag value. valueoffset: Position of tag value in file. """ __slots__ = ( 'parent', 'offset', 'code', 'dtype', 'count', '_value', 'valueoffset', ) parent: TiffFile | TiffWriter """TIFF file tag belongs to.""" offset: int """Position of tag structure in file.""" code: int """Decimal code of tag.""" dtype: int """:py:class:`DATATYPE` of tag value item.""" count: int """Number of items in tag value.""" valueoffset: int """Position of tag value in file.""" _value: Any def __init__( self, parent: TiffFile | TiffWriter, offset: int, code: int, dtype: DATATYPE | int, count: int, value: Any, valueoffset: int, /, ) -> None: self.parent = parent self.offset = int(offset) self.code = int(code) self.count = int(count) self._value = value self.valueoffset = valueoffset try: self.dtype = DATATYPE(dtype) except ValueError: self.dtype = int(dtype) @classmethod def fromfile( cls, parent: TiffFile, /, *, offset: int | None = None, header: bytes | None = None, validate: bool = True, ) -> TiffTag: """Return TiffTag instance from file. Parameters: parent: TiffFile instance tag is read from. offset: Position of tag structure in file. The default is the position of the file handle. header: Tag structure as bytes. The default is read from the file. validate: Raise TiffFileError if data type or value offset are invalid. Raises: TiffFileError: Data type or value offset are invalid and `validate` is *True*. """ tiff = parent.tiff if header is None: if offset is None: offset = parent.filehandle.tell() else: parent.filehandle.seek(offset) header = parent.filehandle.read(tiff.tagsize) elif offset is None: offset = parent.filehandle.tell() valueoffset = offset + tiff.tagsize - tiff.tagoffsetthreshold code, dtype = struct.unpack(tiff.tagformat1, header[:4]) count, value = struct.unpack(tiff.tagformat2, header[4:]) try: valueformat = TIFF.DATA_FORMATS[dtype] except KeyError as exc: msg = ( f' ' f'invalid data type {dtype!r}' ) if validate: raise TiffFileError(msg) from exc logger().error(msg) return cls(parent, offset, code, dtype, count, None, 0) valuesize = count * struct.calcsize(valueformat) if ( valuesize > tiff.tagoffsetthreshold or code in TIFF.TAG_READERS # TODO: only works with offsets? ): valueoffset = struct.unpack(tiff.offsetformat, value)[0] if validate and code in TIFF.TAG_LOAD: value = TiffTag._read_value( parent, offset, code, dtype, count, valueoffset ) elif ( valueoffset < 8 or valueoffset + valuesize > parent.filehandle.size ): msg = ( f' ' f'invalid value offset {valueoffset}' ) if validate: raise TiffFileError(msg) logger().warning(msg) value = None elif code in TIFF.TAG_LOAD: value = TiffTag._read_value( parent, offset, code, dtype, count, valueoffset ) else: value = None elif dtype in {1, 2, 7}: # BYTES, ASCII, UNDEFINED value = value[:valuesize] elif ( tiff.is_ndpi and count == 1 and dtype in {4, 9, 13} and value[4:] != b'\x00\x00\x00\x00' ): # NDPI IFD or LONG, for example, in StripOffsets or StripByteCounts value = struct.unpack(' Any: """Read tag value from file.""" try: valueformat = TIFF.DATA_FORMATS[dtype] except KeyError as exc: raise TiffFileError( f' ' f'invalid data type {dtype!r}' ) from exc fh = parent.filehandle tiff = parent.tiff valuesize = count * struct.calcsize(valueformat) if valueoffset < 8 or valueoffset + valuesize > fh.size: raise TiffFileError( f' ' f'invalid value offset {valueoffset}' ) # if valueoffset % 2: # logger().warning( # f' ' # 'value does not begin on word boundary' # ) fh.seek(valueoffset) if code in TIFF.TAG_READERS: readfunc = TIFF.TAG_READERS[code] value = readfunc(fh, tiff.byteorder, dtype, count, tiff.offsetsize) elif dtype in {1, 2, 7}: # BYTES, ASCII, UNDEFINED value = fh.read(valuesize) if len(value) != valuesize: logger().warning( f' ' 'could not read all values' ) elif code not in TIFF.TAG_TUPLE and count > 1024: value = read_numpy( fh, tiff.byteorder, dtype, count, tiff.offsetsize ) else: fmt = '{}{}{}'.format( tiff.byteorder, count * int(valueformat[0]), valueformat[1] ) value = struct.unpack(fmt, fh.read(valuesize)) return value @staticmethod def _process_value( value: Any, code: int, dtype: int, offset: int, / ) -> Any: """Process tag value.""" if ( value is None or dtype == 1 # BYTE or dtype == 7 # UNDEFINED or code in TIFF.TAG_READERS or not isinstance(value, (bytes, str, tuple)) ): return value if dtype == 2: # TIFF ASCII fields can contain multiple strings, # each terminated with a NUL try: value = bytes2str( stripnull(cast(bytes, value), first=False).strip() ) except UnicodeDecodeError as exc: logger().warning( f' ' f'coercing invalid ASCII to bytes, due to {exc!r:.128}' ) return value if code in TIFF.TAG_ENUM: t = TIFF.TAG_ENUM[code] try: value = tuple(t(v) for v in value) except ValueError as exc: if code not in {259, 317}: # ignore compression/predictor logger().warning( f' ' f'raised {exc!r:.128}' ) if len(value) == 1 and code not in TIFF.TAG_TUPLE: value = value[0] return value @property def value(self) -> Any: """Value of tag, delay-loaded from file if necessary.""" if self._value is None: # print( # f'_read_value {self.code} {TIFF.TAGS.get(self.code)} ' # f'{self.dtype}[{self.count}] @{self.valueoffset} ' # ) fh = self.parent.filehandle with fh.lock: closed = fh.closed if closed: # this is an inefficient resort in case a user delay loads # tag values from a TiffPage with a closed FileHandle. warnings.warn( f'{self!r} reading value from closed file', UserWarning ) fh.open() try: value = TiffTag._read_value( self.parent, self.offset, self.code, self.dtype, self.count, self.valueoffset, ) finally: if closed: fh.close() self._value = TiffTag._process_value( value, self.code, self.dtype, self.offset, ) return self._value @value.setter def value(self, value: Any, /) -> None: self._value = value @property def dtype_name(self) -> str: """Name of data type of tag value.""" try: return self.dtype.name # type: ignore except AttributeError: return f'TYPE{self.dtype}' @property def name(self) -> str: """Name of tag from :py:attr:`_TIFF.TAGS` registry.""" return TIFF.TAGS.get(self.code, str(self.code)) @property def dataformat(self) -> str: """Data type as `struct.pack` format.""" return TIFF.DATA_FORMATS[self.dtype] @property def valuebytecount(self) -> int: """Number of bytes of tag value in file.""" return self.count * struct.calcsize(TIFF.DATA_FORMATS[self.dtype]) def astuple(self) -> TagTuple: """Return tag code, dtype, count, and encoded value. The encoded value is read from file if necessary. """ if isinstance(self.value, bytes): value = self.value else: tiff = self.parent.tiff dataformat = TIFF.DATA_FORMATS[self.dtype] count = self.count * int(dataformat[0]) fmt = f'{tiff.byteorder}{count}{dataformat[1]}' try: if self.dtype == 2: # ASCII value = struct.pack(fmt, self.value.encode('ascii')) if len(value) != count: raise ValueError elif count == 1 and not isinstance(self.value, tuple): value = struct.pack(fmt, self.value) else: value = struct.pack(fmt, *self.value) except Exception as exc: if tiff.is_ndpi and count == 1: raise ValueError( 'cannot pack 64-bit NDPI value to 32-bit dtype' ) from exc fh = self.parent.filehandle pos = fh.tell() fh.seek(self.valueoffset) value = fh.read(struct.calcsize(fmt)) fh.seek(pos) return self.code, int(self.dtype), self.count, value, True def overwrite( self, value: Any, /, *, dtype: DATATYPE | int | str | None = None, erase: bool = True, ) -> TiffTag: """Write new tag value to file and return new TiffTag instance. Warning: changing tag values in TIFF files might result in corrupted files or have unexpected side effects. The packed value is appended to the file if it is longer than the old value. The file position is left where it was. Overwriting tag values in NDPI files > 4 GB is only supported if single integer values and new offsets do not exceed the 32-bit range. Parameters: value: New tag value to write. Must be compatible with the `struct.pack` formats corresponding to the tag's data type. dtype: New tag data type. By default, the data type is not changed. erase: Overwrite previous tag values in file with zeros. Raises: struct.error: Value is not compatible with dtype or new offset exceeds TIFF size limit. ValueError: Invalid value or dtype, or old integer value in NDPI files exceeds 32-bit range. """ if self.offset < 8 or self.valueoffset < 8: raise ValueError(f'cannot rewrite tag at offset {self.offset} < 8') if hasattr(value, 'filehandle'): # passing a TiffFile instance is deprecated and no longer required # since 2021.7.30 raise TypeError( 'TiffTag.overwrite got an unexpected TiffFile instance ' 'as first argument' ) fh = self.parent.filehandle tiff = self.parent.tiff if tiff.is_ndpi: # only support files < 4GB if self.count == 1 and self.dtype in {4, 13}: if isinstance(self.value, tuple): v = self.value[0] else: v = self.value if v > 4294967295: raise ValueError('cannot patch NDPI > 4 GB files') tiff = TIFF.CLASSIC_LE if value is None: value = b'' if dtype is None: dtype = self.dtype elif isinstance(dtype, str): if len(dtype) > 1 and dtype[0] in '<>|=': dtype = dtype[1:] try: dtype = TIFF.DATA_DTYPES[dtype] except KeyError as exc: raise ValueError(f'unknown data type {dtype!r}') from exc else: dtype = enumarg(DATATYPE, dtype) packedvalue: bytes | None = None dataformat: str try: dataformat = TIFF.DATA_FORMATS[dtype] except KeyError as exc: raise ValueError(f'unknown data type {dtype!r}') from exc if dtype == 2: # strings if isinstance(value, str): # enforce 7-bit ASCII on Unicode strings try: value = value.encode('ascii') except UnicodeEncodeError as exc: raise ValueError( 'TIFF strings must be 7-bit ASCII' ) from exc elif not isinstance(value, bytes): raise ValueError('TIFF strings must be 7-bit ASCII') if len(value) == 0 or value[-1:] != b'\x00': value += b'\x00' count = len(value) value = (value,) elif isinstance(value, bytes): # pre-packed binary data dtsize = struct.calcsize(dataformat) if len(value) % dtsize: raise ValueError('invalid packed binary data') count = len(value) // dtsize packedvalue = value value = (value,) else: try: count = len(value) except TypeError: value = (value,) count = 1 if dtype in {5, 10}: if count < 2 or count % 2: raise ValueError('invalid RATIONAL value') count //= 2 # rational if packedvalue is None: packedvalue = struct.pack( f'{tiff.byteorder}{count * int(dataformat[0])}{dataformat[1]}', *value, ) newsize = len(packedvalue) oldsize = self.count * struct.calcsize(TIFF.DATA_FORMATS[self.dtype]) valueoffset = self.valueoffset pos = fh.tell() try: if dtype != self.dtype: # rewrite data type fh.seek(self.offset + 2) fh.write(struct.pack(tiff.byteorder + 'H', dtype)) if oldsize <= tiff.tagoffsetthreshold: if newsize <= tiff.tagoffsetthreshold: # inline -> inline: overwrite fh.seek(self.offset + 4) fh.write(struct.pack(tiff.tagformat2, count, packedvalue)) else: # inline -> separate: append to file fh.seek(0, os.SEEK_END) valueoffset = fh.tell() if valueoffset % 2: # value offset must begin on a word boundary fh.write(b'\x00') valueoffset += 1 # write new offset fh.seek(self.offset + 4) fh.write( struct.pack( tiff.tagformat2, count, struct.pack(tiff.offsetformat, valueoffset), ) ) # write new value fh.seek(valueoffset) fh.write(packedvalue) elif newsize <= tiff.tagoffsetthreshold: # separate -> inline: erase old value valueoffset = ( self.offset + 4 + struct.calcsize(tiff.tagformat2[:2]) ) fh.seek(self.offset + 4) fh.write(struct.pack(tiff.tagformat2, count, packedvalue)) if erase: fh.seek(self.valueoffset) fh.write(b'\x00' * oldsize) elif newsize <= oldsize or self.valueoffset + oldsize == fh.size: # separate -> separate smaller: overwrite, erase remaining fh.seek(self.offset + 4) fh.write(struct.pack(tiff.tagformat2[:2], count)) fh.seek(self.valueoffset) fh.write(packedvalue) if erase and oldsize - newsize > 0: fh.write(b'\x00' * (oldsize - newsize)) else: # separate -> separate larger: erase old value, append to file fh.seek(0, os.SEEK_END) valueoffset = fh.tell() if valueoffset % 2: # value offset must begin on a word boundary fh.write(b'\x00') valueoffset += 1 # write offset fh.seek(self.offset + 4) fh.write( struct.pack( tiff.tagformat2, count, struct.pack(tiff.offsetformat, valueoffset), ) ) # write value fh.seek(valueoffset) fh.write(packedvalue) if erase: fh.seek(self.valueoffset) fh.write(b'\x00' * oldsize) finally: fh.seek(pos) # must restore file position return TiffTag( self.parent, self.offset, self.code, dtype, count, value, valueoffset, ) def _fix_lsm_bitspersample(self) -> None: """Correct LSM bitspersample tag. Old LSM writers may use a separate region for two 16-bit values, although they fit into the tag value element of the tag. """ if self.code != 258 or self.count != 2: return # TODO: test this case; need example file logger().warning(f'{self!r} correcting LSM bitspersample tag') value = struct.pack(' str: name = '|'.join(TIFF.TAGS.getall(self.code, [])) if name: name = ' ' + name return f'' def __str__(self) -> str: return self._str() def _str(self, detail: int = 0, width: int = 79) -> str: """Return string containing information about TiffTag.""" height = 1 if detail <= 0 else 8 * detail dtype = self.dtype_name if self.count > 1: dtype += f'[{self.count}]' name = '|'.join(TIFF.TAGS.getall(self.code, [])) if name: name = f'{self.code} {name} @{self.offset}' else: name = f'{self.code} @{self.offset}' line = f'TiffTag {name} {dtype} @{self.valueoffset} ' line = line[:width] try: value = self.value except TiffFileError: value = 'CORRUPTED' else: try: if self.count == 1: value = enumstr(value) else: value = pformat(tuple(enumstr(v) for v in value)) except Exception: if not isinstance(value, (tuple, list)): pass elif height == 1: value = value[:256] elif len(value) > 2048: value = value[:1024] + value[-1024:] # type: ignore value = pformat(value, width=width, height=height) if detail <= 0: line += '= ' line += value[:width] line = line[:width] else: line += '\n' + value return line @final class TiffTags: """Multidict-like interface to TiffTag instances in TiffPage. Differences to a regular dict: - values are instances of :py:class:`TiffTag`. - keys are :py:attr:`TiffTag.code` (int). - multiple values can be stored per key. - can be indexed by :py:attr:`TiffTag.name` (`str`), slower than by key. - `iter()` returns values instead of keys. - `values()` and `items()` contain all values sorted by offset. - `len()` returns number of all values. - `get()` takes optional index argument. - some functions are not implemented, such as, `update` and `pop`. """ __slots__ = ('_dict', '_list') _dict: dict[int, TiffTag] _list: list[dict[int, TiffTag]] def __init__(self) -> None: self._dict = {} self._list = [self._dict] def add(self, tag: TiffTag, /) -> None: """Add tag.""" code = tag.code for d in self._list: if code not in d: d[code] = tag break else: self._list.append({code: tag}) def keys(self) -> list[int]: """Return codes of all tags.""" return list(self._dict.keys()) def values(self) -> list[TiffTag]: """Return all tags in order they are stored in file.""" tags = (t for d in self._list for t in d.values()) return sorted(tags, key=lambda t: t.offset) def items(self) -> list[tuple[int, TiffTag]]: """Return all (code, tag) pairs in order tags are stored in file.""" items = (i for d in self._list for i in d.items()) return sorted(items, key=lambda i: i[1].offset) def valueof( self, key: int | str, /, default: Any = None, index: int | None = None, ) -> Any: """Return value of tag by code or name if exists, else default. Parameters: key: Code or name of tag to return. default: Another value to return if specified tag is corrupted or not found. index: Specifies tag in case of multiple tags with identical code. The default is the first tag. """ tag = self.get(key, default=None, index=index) if tag is None: return default try: return tag.value except TiffFileError: return default # corrupted tag def get( self, key: int | str, /, default: TiffTag | None = None, index: int | None = None, ) -> TiffTag | None: """Return tag by code or name if exists, else default. Parameters: key: Code or name of tag to return. default: Another tag to return if specified tag is corrupted or not found. index: Specifies tag in case of multiple tags with identical code. The default is the first tag. """ if index is None: if key in self._dict: return self._dict[cast(int, key)] if not isinstance(key, str): return default index = 0 try: tags = self._list[index] except IndexError: return default if key in tags: return tags[cast(int, key)] if not isinstance(key, str): return default for tag in tags.values(): if tag.name == key: return tag return default def getall(self, key: int | str, /, default=None) -> list[TiffTag] | None: """Return list of all tags by code or name if exists, else default. Parameters: key: Code or name of tags to return. default: Value to return if no tags are found. """ result: list[TiffTag] = [] for tags in self._list: if key in tags: result.append(tags[cast(int, key)]) else: break if result: return result if not isinstance(key, str): return default for tags in self._list: for tag in tags.values(): if tag.name == key: result.append(tag) break if not result: break return result if result else default def __getitem__(self, key: int | str, /) -> TiffTag: """Return first tag by code or name. Raise KeyError if not found.""" if key in self._dict: return self._dict[cast(int, key)] if not isinstance(key, str): raise KeyError(key) for tag in self._dict.values(): if tag.name == key: return tag raise KeyError(key) def __setitem__(self, code: int, tag: TiffTag, /) -> None: """Add tag.""" assert tag.code == code self.add(tag) def __delitem__(self, key: int | str, /) -> None: """Delete all tags by code or name.""" found = False for tags in self._list: if key in tags: found = True del tags[cast(int, key)] else: break if found: return if not isinstance(key, str): raise KeyError(key) for tags in self._list: for tag in tags.values(): if tag.name == key: del tags[tag.code] found = True break else: break if not found: raise KeyError(key) return def __contains__(self, item: object, /) -> bool: """Return if tag is in map.""" if item in self._dict: return True if not isinstance(item, str): return False for tag in self._dict.values(): if tag.name == item: return True return False def __iter__(self) -> Iterator[TiffTag]: """Return iterator over all tags.""" return iter(self.values()) def __len__(self) -> int: """Return number of tags.""" size = 0 for d in self._list: size += len(d) return size def __repr__(self) -> str: return f'' def __str__(self) -> str: return self._str() def _str(self, detail: int = 0, width: int = 79) -> str: """Return string with information about TiffTags.""" info = [] tlines = [] vlines = [] for tag in self: value = tag._str(width=width + 1) tlines.append(value[:width].strip()) if detail > 0 and len(value) > width: try: value = tag.value except Exception: # delay load failed or closed file continue if tag.code in {273, 279, 324, 325}: if detail < 1: value = value[:256] elif len(value) > 1024: value = value[:512] + value[-512:] value = pformat(value, width=width, height=detail * 3) else: value = pformat(value, width=width, height=detail * 8) if tag.count > 1: vlines.append( f'{tag.name} {tag.dtype_name}[{tag.count}]\n{value}' ) else: vlines.append(f'{tag.name}\n{value}') info.append('\n'.join(tlines)) if detail > 0 and vlines: info.append('\n') info.append('\n\n'.join(vlines)) return '\n'.join(info) @final class TiffTagRegistry: """Registry of TIFF tag codes and names. Map tag codes and names to names and codes respectively. One tag code may be registered with several names, for example, 34853 is used for GPSTag or OlympusSIS2. Different tag codes may be registered with the same name, for example, 37387 and 41483 are both named FlashEnergy. Parameters: arg: Mapping of codes to names. Examples: >>> tags = TiffTagRegistry([(34853, 'GPSTag'), (34853, 'OlympusSIS2')]) >>> tags.add(37387, 'FlashEnergy') >>> tags.add(41483, 'FlashEnergy') >>> tags['GPSTag'] 34853 >>> tags[34853] 'GPSTag' >>> tags.getall(34853) ['GPSTag', 'OlympusSIS2'] >>> tags.getall('FlashEnergy') [37387, 41483] >>> len(tags) 4 """ __slots__ = ('_dict', '_list') _dict: dict[int | str, str | int] _list: list[dict[int | str, str | int]] def __init__( self, arg: TiffTagRegistry | dict[int, str] | Sequence[tuple[int, str]], /, ) -> None: self._dict = {} self._list = [self._dict] self.update(arg) def update( self, arg: TiffTagRegistry | dict[int, str] | Sequence[tuple[int, str]], /, ): """Add mapping of codes to names to registry. Parameters: arg: Mapping of codes to names. """ if isinstance(arg, TiffTagRegistry): self._list.extend(arg._list) return if isinstance(arg, dict): arg = list(arg.items()) for code, name in arg: self.add(code, name) def add(self, code: int, name: str, /) -> None: """Add code and name to registry.""" for d in self._list: if code in d and d[code] == name: break if code not in d and name not in d: d[code] = name d[name] = code break else: self._list.append({code: name, name: code}) def items(self) -> list[tuple[int, str]]: """Return all registry items as (code, name).""" items = ( i for d in self._list for i in d.items() if isinstance(i[0], int) ) return sorted(items, key=lambda i: i[0]) # type: ignore @overload def get(self, key: int, /, default: None) -> str | None: ... @overload def get(self, key: str, /, default: None) -> int | None: ... @overload def get(self, key: int, /, default: str) -> str: ... def get( self, key: int | str, /, default: str | None = None ) -> str | int | None: """Return first code or name if exists, else default. Parameters: key: tag code or name to lookup. default: value to return if key is not found. """ for d in self._list: if key in d: return d[key] return default @overload def getall(self, key: int, /, default: None) -> list[str] | None: ... @overload def getall(self, key: str, /, default: None) -> list[int] | None: ... @overload def getall(self, key: int, /, default: list[str]) -> list[str]: ... def getall( self, key: int | str, /, default: list[str] | None = None ) -> list[str] | list[int] | None: """Return list of all codes or names if exists, else default. Parameters: key: tag code or name to lookup. default: value to return if key is not found. """ result = [d[key] for d in self._list if key in d] return result if result else default # type: ignore @overload def __getitem__(self, key: int, /) -> str: ... @overload def __getitem__(self, key: str, /) -> int: ... def __getitem__(self, key: int | str, /) -> int | str: """Return first code or name. Raise KeyError if not found.""" for d in self._list: if key in d: return d[key] raise KeyError(key) def __delitem__(self, key: int | str, /) -> None: """Delete all tags of code or name.""" found = False for d in self._list: if key in d: found = True value = d[key] del d[key] del d[value] if not found: raise KeyError(key) def __contains__(self, item: int | str, /) -> bool: """Return if code or name is in registry.""" for d in self._list: if item in d: return True return False def __iter__(self) -> Iterator[tuple[int, str]]: """Return iterator over all items in registry.""" return iter(self.items()) def __len__(self) -> int: """Return number of registered tags.""" size = 0 for d in self._list: size += len(d) return size // 2 def __repr__(self) -> str: return f'' def __str__(self) -> str: return 'TiffTagRegistry(((\n {}\n))'.format( ',\n '.join(f'({code}, {name!r})' for code, name in self.items()) ) @final class TiffPageSeries(collections.abc.Sequence): """Sequence of TIFF pages making up multi-dimensional image. Many TIFF based formats, such as OME-TIFF, use series of TIFF pages to store chunks of larger, multi-dimensional images. The image shape and position of chunks in the multi-dimensional image is defined in format-specific metadata. All pages in a series must have the same :py:meth:`TiffPage.hash`, that is, the same shape, data type, and storage properties. Items of a series may be None (missing) or instances of :py:class:`TiffPage` or :py:class:`TiffFrame`, possibly belonging to different files. Parameters: pages: List of TiffPage, TiffFrame, or None. The file handles of TiffPages or TiffFrames may not be open. shape: Shape of image array in series. dtype: Data type of image array in series. axes: Character codes for dimensions in shape. Length must match shape. attr: Arbitrary metadata associated with series. index: Index of series in multi-series files. parent: TiffFile instance series belongs to. name: Name of series. kind: Nature of series, such as, 'ome' or 'imagej'. truncated: Series is truncated, for example, ImageJ hyperstack > 4 GB. multifile: Series contains pages from multiple files. squeeze: Remove length-1 dimensions (except X and Y) from shape and axes by default. transform: Function to transform image data after decoding. """ levels: list[TiffPageSeries] """Multi-resolution, pyramidal levels. ``levels[0] is self``.""" parent: TiffFile | None """TiffFile instance series belongs to.""" keyframe: TiffPage """TiffPage of series.""" dtype: numpy.dtype[Any] """Data type (native byte order) of image array in series.""" kind: str """Nature of series.""" name: str """Name of image series from metadata.""" transform: Callable[[NDArray[Any]], NDArray[Any]] | None """Function to transform image data after decoding.""" is_multifile: bool """Series contains pages from multiple files.""" is_truncated: bool """Series contains single page describing multi-dimensional image.""" _pages: list[TiffPage | TiffFrame | None] _index: int # index of series in multi-series files _squeeze: bool _axes: str _axes_squeezed: str _shape: tuple[int, ...] _shape_squeezed: tuple[int, ...] _len: int _attr: dict[str, Any] def __init__( self, pages: Sequence[TiffPage | TiffFrame | None], /, shape: Sequence[int] | None = None, dtype: DTypeLike | None = None, axes: str | None = None, *, attr: dict[str, Any] | None = None, coords: Mapping[str, NDArray[Any] | None] | None = None, index: int | None = None, parent: TiffFile | None = None, name: str | None = None, kind: str | None = None, truncated: bool = False, multifile: bool = False, squeeze: bool = True, transform: Callable[[NDArray[Any]], NDArray[Any]] | None = None, ) -> None: self._shape = () self._shape_squeezed = () self._axes = '' self._axes_squeezed = '' self._attr = {} if attr is None else dict(attr) self._index = int(index) if index else 0 self._pages = list( pages ) # might contain only first of contiguous pages self.levels = [self] npages = len(self._pages) try: # find open TiffPage keyframe = next( p.keyframe for p in self._pages if p is not None and p.keyframe is not None and not p.keyframe.parent.filehandle.closed ) except StopIteration: keyframe = next( p.keyframe for p in self._pages if p is not None and p.keyframe is not None ) if shape is None: shape = keyframe.shape if axes is None: axes = keyframe.axes if dtype is None: dtype = keyframe.dtype self.dtype = numpy.dtype(dtype) self.kind = kind if kind else '' self.name = name if name else '' self.transform = transform self.keyframe = keyframe self.is_multifile = bool(multifile) self.is_truncated = bool(truncated) if parent is not None: self.parent = parent elif self._pages: self.parent = self.keyframe.parent else: self.parent = None self._set_dimensions(shape, axes, coords, squeeze) if not truncated and npages == 1: s = product(keyframe.shape) if s > 0: self._len = int(product(self.shape) // s) else: self._len = npages else: self._len = npages def _set_dimensions( self, shape: Sequence[int], axes: str, coords: Mapping[str, NDArray[Any] | None] | None = None, squeeze: bool = True, /, ) -> None: """Set shape, axes, and coords.""" self._squeeze = bool(squeeze) self._shape = tuple(shape) self._axes = axes self._shape_squeezed, self._axes_squeezed, _ = _squeeze_axes( shape, axes ) @property def shape(self) -> tuple[int, ...]: """Shape of image array in series.""" return self._shape_squeezed if self._squeeze else self._shape @property def axes(self) -> str: """Character codes for dimensions in image array.""" return self._axes_squeezed if self._squeeze else self._axes @property def coords(self) -> dict[str, NDArray[Any]]: """Ordered map of dimension names to coordinate arrays.""" raise NotImplementedError # return { # name: numpy.arange(size) # for name, size in zip(self.dims, self.shape) # } def get_shape(self, squeeze: bool | None = None) -> tuple[int, ...]: """Return default, squeezed, or expanded shape of series. Parameters: squeeze: Remove length-1 dimensions from shape. """ if squeeze is None: squeeze = self._squeeze return self._shape_squeezed if squeeze else self._shape def get_axes(self, squeeze: bool | None = None) -> str: """Return default, squeezed, or expanded axes of series. Parameters: squeeze: Remove length-1 dimensions from axes. """ if squeeze is None: squeeze = self._squeeze return self._axes_squeezed if squeeze else self._axes def get_coords( self, squeeze: bool | None = None ) -> dict[str, NDArray[Any]]: """Return default, squeezed, or expanded coords of series. Parameters: squeeze: Remove length-1 dimensions from coords. """ raise NotImplementedError def asarray( self, *, level: int | None = None, **kwargs: Any ) -> NDArray[Any]: """Return images from series of pages as NumPy array. Parameters: level: Pyramid level to return. By default, the base layer is returned. **kwargs: Additional arguments passed to :py:meth:`TiffFile.asarray`. """ if self.parent is None: raise ValueError('no parent') if level is not None: return self.levels[level].asarray(**kwargs) result = self.parent.asarray(series=self, **kwargs) if self.transform is not None: result = self.transform(result) return result def aszarr( self, *, level: int | None = None, **kwargs: Any ) -> ZarrTiffStore: """Return image array from series of pages as Zarr store. Parameters: level: Pyramid level to return. By default, a multi-resolution store is returned. **kwargs: Additional arguments passed to :py:class:`ZarrTiffStore`. """ if self.parent is None: raise ValueError('no parent') return ZarrTiffStore(self, level=level, **kwargs) @cached_property def dataoffset(self) -> int | None: """Offset to contiguous image data in file.""" if not self._pages: return None pos = 0 for page in self._pages: if page is None or len(page.dataoffsets) == 0: return None if not page.is_final: return None if not pos: pos = page.dataoffsets[0] + page.nbytes continue if pos != page.dataoffsets[0]: return None pos += page.nbytes page = self._pages[0] if page is None or len(page.dataoffsets) == 0: return None offset = page.dataoffsets[0] if ( len(self._pages) == 1 and isinstance(page, TiffPage) and (page.is_imagej or page.is_shaped or page.is_stk) ): # truncated files return offset if pos == offset + product(self.shape) * self.dtype.itemsize: return offset return None @property def is_pyramidal(self) -> bool: """Series contains multiple resolutions.""" return len(self.levels) > 1 @cached_property def attr(self) -> dict[str, Any]: """Arbitrary metadata associated with series.""" return self._attr @property def ndim(self) -> int: """Number of array dimensions.""" return len(self.shape) @property def dims(self) -> tuple[str, ...]: """Names of dimensions in image array.""" # return tuple(self.coords.keys()) return tuple( unique_strings(TIFF.AXES_NAMES.get(ax, ax) for ax in self.axes) ) @property def sizes(self) -> dict[str, int]: """Ordered map of dimension names to lengths.""" # return dict(zip(self.coords.keys(), self.shape)) return dict(zip(self.dims, self.shape)) @cached_property def size(self) -> int: """Number of elements in array.""" return product(self.shape) @cached_property def nbytes(self) -> int: """Number of bytes in array.""" return self.size * self.dtype.itemsize @property def pages(self) -> TiffPageSeries: # sequence of TiffPages or TiffFrame in series # a workaround to keep the old interface working return self def _getitem(self, key: int, /) -> TiffPage | TiffFrame | None: """Return specified page of series from cache or file.""" key = int(key) if key < 0: key %= self._len if len(self._pages) == 1 and 0 < key < self._len: page = self._pages[0] assert page is not None assert self.parent is not None return self.parent.pages._getitem(page.index + key) return self._pages[key] @overload def __getitem__( self, key: int | numpy.integer, / ) -> TiffPage | TiffFrame | None: ... @overload def __getitem__( self, key: slice | Iterable[int], / ) -> list[TiffPage | TiffFrame | None]: ... def __getitem__( self, key: int | numpy.integer | slice | Iterable[int], / ) -> TiffPage | TiffFrame | list[TiffPage | TiffFrame | None] | None: """Return specified page(s).""" if isinstance(key, (int, numpy.integer)): return self._getitem(int(key)) if isinstance(key, slice): return [self._getitem(i) for i in range(*key.indices(self._len))] if isinstance(key, collections.abc.Iterable) and not isinstance( key, str ): return [self._getitem(k) for k in key] raise TypeError('key must be an integer, slice, or iterable') def __iter__(self) -> Iterator[TiffPage | TiffFrame | None]: """Return iterator over pages in series.""" if len(self._pages) == self._len: yield from self._pages else: assert self.parent is not None and self._pages[0] is not None pages = self.parent.pages index = self._pages[0].index for i in range(self._len): yield pages[index + i] def __len__(self) -> int: """Return number of pages in series.""" return self._len def __repr__(self) -> str: return f'' def __str__(self) -> str: s = ' '.join( s for s in ( snipstr(f'{self.name!r}', 20) if self.name else '', 'x'.join(str(i) for i in self.shape), str(self.dtype), self.axes, self.kind, (f'{len(self.levels)} Levels') if self.is_pyramidal else '', f'{len(self)} Pages', (f'@{self.dataoffset}') if self.dataoffset else '', ) if s ) return f'TiffPageSeries {self._index} {s}' # TODO: derive from zarr.storage.Store # TODO: this interface does not expose index keys except in __getitem__ class ZarrStore(collections.abc.MutableMapping): """Zarr store base class. ZarrStore instances must be closed with :py:meth:`ZarrStore.close`, which is automatically called when using the 'with' context manager. Parameters: fillvalue: Value to use for missing chunks of Zarr store. The default is 0. chunkmode: Specifies how to chunk data. References: 1. https://zarr.readthedocs.io/en/stable/spec/v2.html 2. https://forum.image.sc/t/multiscale-arrays-v0-1/37930 """ _store: dict[str, Any] _fillvalue: int | float _chunkmode: int def __init__( self, /, *, fillvalue: int | float | None = None, chunkmode: CHUNKMODE | int | str | None = None, ) -> None: self._store = {} self._fillvalue = 0 if fillvalue is None else fillvalue if chunkmode is None: self._chunkmode = CHUNKMODE(0) else: self._chunkmode = enumarg(CHUNKMODE, chunkmode) def __enter__(self) -> ZarrStore: return self def __exit__(self, exc_type, exc_value, traceback) -> None: # type: ignore self.close() def __del__(self) -> None: self.close() def close(self) -> None: """Close ZarrStore.""" def flush(self) -> None: """Flush ZarrStore.""" raise PermissionError('ZarrStore is read-only') def clear(self) -> None: """Clear ZarrStore.""" raise PermissionError('ZarrStore is read-only') def keys(self) -> KeysView[str]: """Return keys in ZarrStore.""" return self._store.keys() def items(self) -> ItemsView[str, Any]: """Return items in ZarrStore.""" return self._store.items() def values(self) -> ValuesView[Any]: """Return values in ZarrStore.""" return self._store.values() def __iter__(self) -> Iterator[str]: return iter(self._store) def __len__(self) -> int: return len(self._store) def __contains__(self, key: object, /) -> bool: if key in self._store: return True assert isinstance(key, str) return self._contains(key) def _contains(self, key: str, /) -> bool: """Return if key is in store.""" raise NotImplementedError def __delitem__(self, key: object, /) -> None: raise PermissionError('ZarrStore is read-only') def __getitem__(self, key: str, /) -> Any: if key in self._store: return self._store[key] if key[-7:] == '.zarray' or key[-7:] == '.zgroup': # catch '.zarray' and 'attribute/.zarray' raise KeyError(key) return self._getitem(key) def _getitem(self, key: str, /) -> NDArray[Any]: """Return chunk from file.""" raise NotImplementedError def __setitem__(self, key: str, value: bytes, /) -> None: if key in self._store: raise KeyError(key) if key[-7:] == '.zarray' or key[-7:] == '.zgroup': # catch '.zarray' and 'attribute/.zarray' raise KeyError(key) return self._setitem(key, value) def _setitem(self, key: str, value: bytes, /) -> None: """Write chunk from file.""" raise NotImplementedError @property def is_multiscales(self) -> bool: """Return if ZarrStore is multi-scales.""" return b'multiscales' in self._store['.zattrs'] @staticmethod def _empty_chunk( shape: tuple[int, ...], dtype: DTypeLike, fillvalue: int | float | None, /, ) -> NDArray[Any]: """Return empty chunk.""" if fillvalue is None or fillvalue == 0: # return bytes(product(shape) * dtype.itemsize) return numpy.zeros(shape, dtype) chunk = numpy.empty(shape, dtype) chunk[:] = fillvalue return chunk # .tobytes() @staticmethod def _dtype_str(dtype: numpy.dtype[Any], /) -> str: """Return dtype as string with native byte order.""" if dtype.itemsize == 1: byteorder = '|' else: byteorder = {'big': '>', 'little': '<'}[sys.byteorder] return byteorder + dtype.str[1:] @staticmethod def _json(obj: Any, /) -> bytes: """Serialize object to JSON formatted string.""" return json.dumps( obj, indent=1, sort_keys=True, ensure_ascii=True, separators=(',', ': '), ).encode('ascii') @staticmethod def _value(value: Any, dtype: numpy.dtype[Any], /) -> Any: """Return value which is serializable to JSON.""" if value is None: return value if dtype.kind == 'b': return bool(value) if dtype.kind in 'ui': return int(value) if dtype.kind == 'f': if numpy.isnan(value): return 'NaN' if numpy.isposinf(value): return 'Infinity' if numpy.isneginf(value): return '-Infinity' return float(value) if dtype.kind in 'c': value = numpy.array(value, dtype) return ( ZarrStore._value(value.real, dtype.type().real.dtype), ZarrStore._value(value.imag, dtype.type().imag.dtype), ) return value @staticmethod def _ndindex( shape: tuple[int, ...], chunks: tuple[int, ...], / ) -> Iterator[str]: """Return iterator over all chunk index strings.""" assert len(shape) == len(chunks) chunked = tuple( i // j + (1 if i % j else 0) for i, j in zip(shape, chunks) ) for indices in numpy.ndindex(chunked): yield '.'.join(str(index) for index in indices) @final class ZarrTiffStore(ZarrStore): """Zarr store interface to image array in TiffPage or TiffPageSeries. ZarrTiffStore is using a TiffFile instance for reading and decoding chunks. Therefore, ZarrTiffStore instances cannot be pickled. For writing, image data must be stored in uncompressed, unpredicted, and unpacked form. Sparse strips and tiles are not written. Parameters: arg: TIFF page or series to wrap as Zarr store. level: Pyramidal level to wrap. The default is 0. chunkmode: Use strips or tiles (0) or whole page data (2) as chunks. The default is 0. fillvalue: Value to use for missing chunks. The default is 0. zattrs: Additional attributes to store in `.zattrs`. multiscales: Create a multiscales compatible Zarr group store. By default, create a Zarr array store for pages and non-pyramidal series. lock: Reentrant lock to synchronize seeks and reads from file. By default, the lock of the parent's file handle is used. squeeze: Remove length-1 dimensions from shape of TiffPageSeries. maxworkers: Maximum number of threads to concurrently decode strips or tiles if `chunkmode=2`. If *None* or *0*, use up to :py:attr:`_TIFF.MAXWORKERS` threads. buffersize: Approximate number of bytes to read from file in one pass if `chunkmode=2`. The default is :py:attr:`_TIFF.BUFFERSIZE`. _openfiles: Internal API. """ _data: list[TiffPageSeries] _filecache: FileCache _transform: Callable[[NDArray[Any]], NDArray[Any]] | None _maxworkers: int | None _buffersize: int | None _squeeze: bool | None _writable: bool _multiscales: bool def __init__( self, arg: TiffPage | TiffFrame | TiffPageSeries, /, *, level: int | None = None, chunkmode: CHUNKMODE | int | str | None = None, fillvalue: int | float | None = None, zattrs: dict[str, Any] | None = None, multiscales: bool | None = None, lock: threading.RLock | NullContext | None = None, squeeze: bool | None = None, maxworkers: int | None = None, buffersize: int | None = None, _openfiles: int | None = None, ) -> None: super().__init__(fillvalue=fillvalue, chunkmode=chunkmode) if self._chunkmode not in {0, 2}: raise NotImplementedError(f'{self._chunkmode!r} not implemented') self._squeeze = None if squeeze is None else bool(squeeze) self._maxworkers = maxworkers self._buffersize = buffersize if isinstance(arg, TiffPageSeries): self._data = arg.levels self._transform = arg.transform if multiscales is not None and not multiscales: level = 0 if level is not None: self._data = [self._data[level]] name = arg.name else: self._data = [TiffPageSeries([arg])] self._transform = None name = 'Unnamed' fh = self._data[0].keyframe.parent._parent.filehandle self._writable = fh.writable() and self._chunkmode == 0 if lock is None: fh.set_lock(True) lock = fh.lock self._filecache = FileCache(size=_openfiles, lock=lock) zattrs = {} if zattrs is None else dict(zattrs) # TODO: Zarr Encoding Specification # https://xarray.pydata.org/en/stable/internals/zarr-encoding-spec.html if multiscales or len(self._data) > 1: # multiscales self._multiscales = True if '_ARRAY_DIMENSIONS' in zattrs: array_dimensions = zattrs.pop('_ARRAY_DIMENSIONS') else: array_dimensions = list(self._data[0].get_axes(squeeze)) self._store['.zgroup'] = ZarrStore._json({'zarr_format': 2}) self._store['.zattrs'] = ZarrStore._json( { # TODO: use https://ngff.openmicroscopy.org/latest/ 'multiscales': [ { 'version': '0.1', 'name': name, 'datasets': [ {'path': str(i)} for i in range(len(self._data)) ], # 'axes': [...] # 'type': 'unknown', 'metadata': {}, } ], **zattrs, } ) shape0 = self._data[0].get_shape(squeeze) for level, series in enumerate(self._data): keyframe = series.keyframe keyframe.decode # cache decode function shape = series.get_shape(squeeze) dtype = series.dtype if fillvalue is None: self._fillvalue = fillvalue = keyframe.nodata if self._chunkmode: chunks = keyframe.shape else: chunks = keyframe.chunks self._store[f'{level}/.zattrs'] = ZarrStore._json( { '_ARRAY_DIMENSIONS': [ (f'{ax}{level}' if i != j else ax) for ax, i, j in zip( array_dimensions, shape, shape0 ) ] } ) self._store[f'{level}/.zarray'] = ZarrStore._json( { 'zarr_format': 2, 'shape': shape, 'chunks': ZarrTiffStore._chunks(chunks, shape), 'dtype': ZarrStore._dtype_str(dtype), 'compressor': None, 'fill_value': ZarrStore._value(fillvalue, dtype), 'order': 'C', 'filters': None, } ) if self._writable: self._writable = ZarrTiffStore._is_writable(keyframe) else: self._multiscales = False series = self._data[0] keyframe = series.keyframe keyframe.decode # cache decode function shape = series.get_shape(squeeze) dtype = series.dtype if fillvalue is None: self._fillvalue = fillvalue = keyframe.nodata if self._chunkmode: chunks = keyframe.shape else: chunks = keyframe.chunks if '_ARRAY_DIMENSIONS' not in zattrs: zattrs['_ARRAY_DIMENSIONS'] = list(series.get_axes(squeeze)) self._store['.zattrs'] = ZarrStore._json(zattrs) self._store['.zarray'] = ZarrStore._json( { 'zarr_format': 2, 'shape': shape, 'chunks': ZarrTiffStore._chunks(chunks, shape), 'dtype': ZarrStore._dtype_str(dtype), 'compressor': None, 'fill_value': ZarrStore._value(fillvalue, dtype), 'order': 'C', 'filters': None, } ) if self._writable: self._writable = ZarrTiffStore._is_writable(keyframe) def close(self) -> None: """Close open file handles.""" if hasattr(self, '_filecache'): self._filecache.clear() def write_fsspec( self, jsonfile: str | os.PathLike[Any] | TextIO, /, url: str, *, groupname: str | None = None, templatename: str | None = None, compressors: dict[COMPRESSION | int, str | None] | None = None, version: int | None = None, _shape: Sequence[int] | None = None, _axes: Sequence[str] | None = None, _index: Sequence[int] | None = None, _append: bool = False, _close: bool = True, ) -> None: """Write fsspec ReferenceFileSystem as JSON to file. Parameters: jsonfile: Name or open file handle of output JSON file. url: Remote location of TIFF file(s) without file name(s). groupname: Zarr group name. templatename: Version 1 URL template name. The default is 'u'. compressors: Mapping of :py:class:`COMPRESSION` codes to Numcodecs codec names. version: Version of fsspec file to write. The default is 0. _shape: Shape of file sequence (experimental). _axes: Axes of file sequence (experimental). _index Index of file in sequence (experimental). _append: If *True*, only write index keys and values (experimental). _close: If *True*, no more appends (experimental). Raises: ValueError: ZarrTiffStore cannot be represented as ReferenceFileSystem due to features that are not supported by Zarr, Numcodecs, or Imagecodecs: - compressors, such as CCITT - filters, such as bitorder reversal, packed integers - dtypes, such as float24, complex integers - JPEGTables in multi-page series - incomplete chunks, such as `imagelength % rowsperstrip != 0` Files containing incomplete tiles may fail at runtime. Notes: Parameters `_shape`, `_axes`, `_index`, `_append`, and `_close` are an experimental API for joining the ReferenceFileSystems of multiple files of a TiffSequence. References: - `fsspec ReferenceFileSystem format `_ """ compressors = { 1: None, 8: 'zlib', 32946: 'zlib', 34925: 'lzma', 50013: 'zlib', # pixtiff 5: 'imagecodecs_lzw', 7: 'imagecodecs_jpeg', 22610: 'imagecodecs_jpegxr', 32773: 'imagecodecs_packbits', 33003: 'imagecodecs_jpeg2k', 33004: 'imagecodecs_jpeg2k', 33005: 'imagecodecs_jpeg2k', 33007: 'imagecodecs_jpeg', 34712: 'imagecodecs_jpeg2k', 34887: 'imagecodecs_lerc', 34892: 'imagecodecs_jpeg', 34933: 'imagecodecs_png', 34934: 'imagecodecs_jpegxr', 48124: 'imagecodecs_jetraw', 50000: 'imagecodecs_zstd', # numcodecs.zstd fails w/ unknown sizes 50001: 'imagecodecs_webp', 50002: 'imagecodecs_jpegxl', 52546: 'imagecodecs_jpegxl', **({} if compressors is None else compressors), } for series in self._data: errormsg = ' not supported by the fsspec ReferenceFileSystem' keyframe = series.keyframe if ( keyframe.compression in {65000, 65001, 65002} and keyframe.parent.is_eer ): compressors[keyframe.compression] = 'imagecodecs_eer' if keyframe.compression not in compressors: raise ValueError(f'{keyframe.compression!r} is' + errormsg) if keyframe.fillorder != 1: raise ValueError(f'{keyframe.fillorder!r} is' + errormsg) if keyframe.sampleformat not in {1, 2, 3, 6}: # TODO: support float24 and cint via filters? raise ValueError(f'{keyframe.sampleformat!r} is' + errormsg) if ( keyframe.bitspersample not in { 8, 16, 32, 64, 128, } and keyframe.compression not in { # JPEG 7, 33007, 34892, } and compressors[keyframe.compression] != 'imagecodecs_eer' ): raise ValueError( f'BitsPerSample {keyframe.bitspersample} is' + errormsg ) if ( not self._chunkmode and not keyframe.is_tiled and keyframe.imagelength % keyframe.rowsperstrip ): raise ValueError('incomplete chunks are' + errormsg) if self._chunkmode and not keyframe.is_final: raise ValueError(f'{self._chunkmode!r} is' + errormsg) if keyframe.jpegtables is not None and len(series.pages) > 1: raise ValueError( 'JPEGTables in multi-page files are' + errormsg ) if url is None: url = '' elif url and url[-1] != '/': url += '/' url = url.replace('\\', '/') if groupname is None: groupname = '' elif groupname and groupname[-1] != '/': groupname += '/' byteorder: ByteOrder | None = '<' if sys.byteorder == 'big' else '>' if ( self._data[0].keyframe.parent.byteorder != byteorder or self._data[0].keyframe.dtype is None or self._data[0].keyframe.dtype.itemsize == 1 ): byteorder = None index: str _shape = [] if _shape is None else list(_shape) _axes = [] if _axes is None else list(_axes) if len(_shape) != len(_axes): raise ValueError('len(_shape) != len(_axes)') if _index is None: index = '' elif len(_shape) != len(_index): raise ValueError('len(_shape) != len(_index)') elif _index: index = '.'.join(str(i) for i in _index) index += '.' refs: dict[str, Any] = {} refzarr: dict[str, Any] if version == 1: if _append: raise ValueError('cannot append to version 1') if templatename is None: templatename = 'u' refs['version'] = 1 refs['templates'] = {} refs['gen'] = [] templates = {} if self._data[0].is_multifile: i = 0 for page in self._data[0].pages: if page is None or page.keyframe is None: continue fname = page.keyframe.parent.filehandle.name if fname in templates: continue key = f'{templatename}{i}' templates[fname] = '{{%s}}' % key refs['templates'][key] = url + fname i += 1 else: fname = self._data[0].keyframe.parent.filehandle.name key = f'{templatename}' templates[fname] = '{{%s}}' % key refs['templates'][key] = url + fname refs['refs'] = refzarr = {} else: refzarr = refs if not _append: if groupname: # TODO: support nested groups refzarr['.zgroup'] = ZarrStore._json( {'zarr_format': 2} ).decode() for key, value in self._store.items(): if '.zattrs' in key and _axes: value = json.loads(value) if '_ARRAY_DIMENSIONS' in value: value['_ARRAY_DIMENSIONS'] = ( _axes + value['_ARRAY_DIMENSIONS'] ) value = ZarrStore._json(value) elif '.zarray' in key: level = int(key.split('/')[0]) if '/' in key else 0 keyframe = self._data[level].keyframe value = json.loads(value) if _shape: value['shape'] = _shape + value['shape'] value['chunks'] = [1] * len(_shape) + value['chunks'] codec_id = compressors[keyframe.compression] if codec_id == 'imagecodecs_jpeg': # TODO: handle JPEG color spaces jpegtables = keyframe.jpegtables if jpegtables is None: tables = None else: import base64 tables = base64.b64encode(jpegtables).decode() jpegheader = keyframe.jpegheader if jpegheader is None: header = None else: import base64 header = base64.b64encode(jpegheader).decode() ( colorspace_jpeg, colorspace_data, ) = jpeg_decode_colorspace( keyframe.photometric, keyframe.planarconfig, keyframe.extrasamples, keyframe.is_jfif, ) value['compressor'] = { 'id': codec_id, 'tables': tables, 'header': header, 'bitspersample': keyframe.bitspersample, 'colorspace_jpeg': colorspace_jpeg, 'colorspace_data': colorspace_data, } elif ( codec_id == 'imagecodecs_webp' and keyframe.samplesperpixel == 4 ): value['compressor'] = { 'id': codec_id, 'hasalpha': True, } elif codec_id == 'imagecodecs_eer': if keyframe.compression == 65002: rlebits = int(keyframe.tags.valueof(65007, 7)) horzbits = int(keyframe.tags.valueof(65008, 2)) vertbits = int(keyframe.tags.valueof(65009, 2)) elif keyframe.compression == 65001: rlebits = 7 horzbits = 2 vertbits = 2 else: rlebits = 8 horzbits = 2 vertbits = 2 value['compressor'] = { 'id': codec_id, 'shape': keyframe.chunks, 'rlebits': rlebits, 'horzbits': horzbits, 'vertbits': vertbits, } elif codec_id is not None: value['compressor'] = {'id': codec_id} if byteorder is not None: value['dtype'] = byteorder + value['dtype'][1:] if keyframe.predictor > 1: # predictors need access to chunk shape and dtype # requires imagecodecs > 2021.8.26 to read if keyframe.predictor in {2, 34892, 34893}: filter_id = 'imagecodecs_delta' else: filter_id = 'imagecodecs_floatpred' if keyframe.predictor <= 3: dist = 1 elif keyframe.predictor in {34892, 34894}: dist = 2 else: dist = 4 if ( keyframe.planarconfig == 1 and keyframe.samplesperpixel > 1 ): axis = -2 else: axis = -1 value['filters'] = [ { 'id': filter_id, 'axis': axis, 'dist': dist, 'shape': value['chunks'], 'dtype': value['dtype'], } ] value = ZarrStore._json(value) refzarr[groupname + key] = value.decode() fh: TextIO if hasattr(jsonfile, 'write'): fh = jsonfile # type: ignore else: fh = open(jsonfile, 'w', encoding='utf-8') if version == 1: fh.write(json.dumps(refs, indent=1).rsplit('}"', 1)[0] + '}"') indent = ' ' elif _append: indent = ' ' else: fh.write(json.dumps(refs, indent=1)[:-2]) indent = ' ' for key, value in self._store.items(): if '.zarray' in key: value = json.loads(value) shape = value['shape'] chunks = value['chunks'] levelstr = (key.split('/')[0] + '/') if '/' in key else '' for chunkindex in ZarrStore._ndindex(shape, chunks): key = levelstr + chunkindex keyframe, page, _, offset, bytecount = self._parse_key(key) key = levelstr + index + chunkindex if page and self._chunkmode and offset is None: offset = page.dataoffsets[0] bytecount = keyframe.nbytes if offset and bytecount: fname = keyframe.parent.filehandle.name if version == 1: fname = templates[fname] else: fname = f'{url}{fname}' fh.write( f',\n{indent}"{groupname}{key}": ' f'["{fname}", {offset}, {bytecount}]' ) # TODO: support nested groups if version == 1: fh.write('\n }\n}') elif _close: fh.write('\n}') if not hasattr(jsonfile, 'write'): fh.close() def _contains(self, key: str, /) -> bool: """Return if key is in store.""" try: _, page, _, offset, bytecount = self._parse_key(key) except (KeyError, IndexError): return False if self._chunkmode and offset is None: return True return ( page is not None and offset is not None and bytecount is not None and offset > 0 and bytecount > 0 ) def _getitem(self, key: str, /) -> NDArray[Any]: """Return chunk from file.""" keyframe, page, chunkindex, offset, bytecount = self._parse_key(key) if page is None or offset == 0 or bytecount == 0: raise KeyError(key) fh = page.parent.filehandle if self._chunkmode and offset is None: self._filecache.open(fh) chunk = page.asarray( lock=self._filecache.lock, maxworkers=self._maxworkers, buffersize=self._buffersize, ) self._filecache.close(fh) if self._transform is not None: chunk = self._transform(chunk) return chunk assert offset is not None and bytecount is not None chunk_bytes = self._filecache.read(fh, offset, bytecount) decodeargs: dict[str, Any] = {'_fullsize': True} if page.jpegtables is not None: decodeargs['jpegtables'] = page.jpegtables if keyframe.jpegheader is not None: decodeargs['jpegheader'] = keyframe.jpegheader assert chunkindex is not None chunk = keyframe.decode( chunk_bytes, chunkindex, **decodeargs # type: ignore )[0] assert chunk is not None if self._transform is not None: chunk = self._transform(chunk) if self._chunkmode: chunks = keyframe.shape else: chunks = keyframe.chunks if chunk.size != product(chunks): raise RuntimeError(f'{chunk.size} != {product(chunks)}') return chunk # .tobytes() def _setitem(self, key: str, value: bytes, /) -> None: """Write chunk to file.""" if not self._writable: raise PermissionError('ZarrStore is read-only') keyframe, page, chunkindex, offset, bytecount = self._parse_key(key) if ( page is None or offset is None or offset == 0 or bytecount is None or bytecount == 0 ): return if bytecount < len(value): value = value[:bytecount] self._filecache.write(page.parent.filehandle, offset, value) def _parse_key(self, key: str, /) -> tuple[ TiffPage, TiffPage | TiffFrame | None, int | None, int | None, int | None, ]: """Return keyframe, page, index, offset, and bytecount from key. Raise KeyError if key is not valid. """ if self._multiscales: try: level, key = key.split('/') series = self._data[int(level)] except (ValueError, IndexError) as exc: raise KeyError(key) from exc else: series = self._data[0] keyframe = series.keyframe pageindex, chunkindex = self._indices(key, series) if pageindex > 0 and len(series) == 1: # truncated ImageJ, STK, or shaped if series.dataoffset is None: raise RuntimeError('truncated series is not contiguous') page = series[0] if page is None or page.dtype is None or page.keyframe is None: return keyframe, None, chunkindex, 0, 0 offset = pageindex * page.size * page.dtype.itemsize try: offset += page.dataoffsets[chunkindex] except IndexError as exc: raise KeyError(key) from exc if self._chunkmode: bytecount = page.size * page.dtype.itemsize return page.keyframe, page, chunkindex, offset, bytecount elif self._chunkmode: with self._filecache.lock: page = series[pageindex] if page is None or page.keyframe is None: return keyframe, None, None, 0, 0 return page.keyframe, page, None, None, None else: with self._filecache.lock: page = series[pageindex] if page is None or page.keyframe is None: return keyframe, None, chunkindex, 0, 0 try: offset = page.dataoffsets[chunkindex] except IndexError: # raise KeyError(key) from exc # issue #249: Philips may be missing last row of tiles return page.keyframe, page, chunkindex, 0, 0 try: bytecount = page.databytecounts[chunkindex] except IndexError as exc: raise KeyError(key) from exc return page.keyframe, page, chunkindex, offset, bytecount def _indices(self, key: str, series: TiffPageSeries, /) -> tuple[int, int]: """Return page and strile indices from Zarr chunk index.""" keyframe = series.keyframe shape = series.get_shape(self._squeeze) try: indices = [int(i) for i in key.split('.')] except ValueError as exc: raise KeyError(key) from exc assert len(indices) == len(shape) if self._chunkmode: chunked = (1,) * len(keyframe.shape) else: chunked = keyframe.chunked p = 1 for i, s in enumerate(shape[::-1]): p *= s if p == keyframe.size: i = len(indices) - i - 1 frames_indices = indices[:i] strile_indices = indices[i:] frames_chunked = shape[:i] strile_chunked = list(shape[i:]) # updated later break else: raise RuntimeError if len(strile_chunked) == len(keyframe.shape): strile_chunked = list(chunked) else: # get strile_chunked including singleton dimensions i = len(strile_indices) - 1 j = len(keyframe.shape) - 1 while True: if strile_chunked[i] == keyframe.shape[j]: strile_chunked[i] = chunked[j] i -= 1 j -= 1 elif strile_chunked[i] == 1: i -= 1 else: raise RuntimeError('shape does not match page shape') if i < 0 or j < 0: break assert product(strile_chunked) == product(chunked) if len(frames_indices) > 0: frameindex = int( numpy.ravel_multi_index(frames_indices, frames_chunked) ) else: frameindex = 0 if len(strile_indices) > 0: strileindex = int( numpy.ravel_multi_index(strile_indices, strile_chunked) ) else: strileindex = 0 return frameindex, strileindex @staticmethod def _chunks( chunks: tuple[int, ...], shape: tuple[int, ...], / ) -> tuple[int, ...]: """Return chunks with same length as shape.""" ndim = len(shape) if ndim == 0: return () # empty array if 0 in shape: return (1,) * ndim newchunks = [] i = ndim - 1 j = len(chunks) - 1 while True: if j < 0: newchunks.append(1) i -= 1 elif shape[i] > 1 and chunks[j] > 1: newchunks.append(chunks[j]) i -= 1 j -= 1 elif shape[i] == chunks[j]: # both 1 newchunks.append(1) i -= 1 j -= 1 elif shape[i] == 1: newchunks.append(1) i -= 1 elif chunks[j] == 1: newchunks.append(1) j -= 1 else: raise RuntimeError if i < 0 or ndim == len(newchunks): break # assert ndim == len(newchunks) return tuple(newchunks[::-1]) @staticmethod def _is_writable(keyframe: TiffPage) -> bool: """Return True if chunks are writable.""" return ( keyframe.compression == 1 and keyframe.fillorder == 1 and keyframe.sampleformat in {1, 2, 3, 6} and keyframe.bitspersample in {8, 16, 32, 64, 128} # and ( # keyframe.rowsperstrip == 0 # or keyframe.imagelength % keyframe.rowsperstrip == 0 # ) ) def __enter__(self) -> ZarrTiffStore: return self def __repr__(self) -> str: return f'' @final class ZarrFileSequenceStore(ZarrStore): """Zarr store interface to image array in FileSequence. Parameters: filesequence: FileSequence instance to wrap as Zarr store. Files in containers are not supported. fillvalue: Value to use for missing chunks. The default is 0. chunkmode: Currently only one chunk per file is supported. chunkshape: Shape of chunk in each file. Must match ``FileSequence.imread(file, **imreadargs).shape``. chunkdtype: Data type of chunk in each file. Must match ``FileSequence.imread(file, **imreadargs).dtype``. axestiled: Axes to be tiled. Map stacked sequence axis to chunk axis. zattrs: Additional attributes to store in `.zattrs`. imreadargs: Arguments passed to :py:attr:`FileSequence.imread`. **kwargs: Arguments passed to :py:attr:`FileSequence.imread`in addition to `imreadargs`. Notes: If `chunkshape` or `chunkdtype` are *None* (default), their values are determined by reading the first file with ``FileSequence.imread(arg.files[0], **imreadargs)``. """ imread: Callable[..., NDArray[Any]] """Function to read image array from single file.""" _lookup: dict[tuple[int, ...], str] _chunks: tuple[int, ...] _dtype: numpy.dtype[Any] _tiled: TiledSequence _commonpath: str _kwargs: dict[str, Any] def __init__( self, filesequence: FileSequence, /, *, fillvalue: int | float | None = None, chunkmode: CHUNKMODE | int | str | None = None, chunkshape: Sequence[int] | None = None, chunkdtype: DTypeLike | None = None, dtype: DTypeLike | None = None, # deprecated axestiled: dict[int, int] | Sequence[tuple[int, int]] | None = None, zattrs: dict[str, Any] | None = None, imreadargs: dict[str, Any] | None = None, **kwargs: Any, ) -> None: super().__init__(fillvalue=fillvalue, chunkmode=chunkmode) if self._chunkmode not in {0, 3}: raise ValueError(f'invalid chunkmode {self._chunkmode!r}') if not isinstance(filesequence, FileSequence): raise TypeError('not a FileSequence') if filesequence._container: raise NotImplementedError('cannot open container as Zarr store') # TODO: deprecate kwargs? if imreadargs is not None: kwargs |= imreadargs self._kwargs = kwargs self._imread = filesequence.imread self._commonpath = filesequence.commonpath() if dtype is not None: warnings.warn( ' ' 'the dtype argument is deprecated since 2024.2.12. ' 'Use chunkdtype', DeprecationWarning, stacklevel=2, ) chunkdtype = dtype del dtype if chunkshape is None or chunkdtype is None: chunk = filesequence.imread(filesequence[0], **kwargs) self._chunks = chunk.shape self._dtype = chunk.dtype else: self._chunks = tuple(chunkshape) self._dtype = numpy.dtype(chunkdtype) chunk = None self._tiled = TiledSequence( filesequence.shape, self._chunks, axestiled=axestiled ) self._lookup = dict( zip(self._tiled.indices(filesequence.indices), filesequence) ) zattrs = {} if zattrs is None else dict(zattrs) # TODO: add _ARRAY_DIMENSIONS to ZarrFileSequenceStore # if '_ARRAY_DIMENSIONS' not in zattrs: # zattrs['_ARRAY_DIMENSIONS'] = list(...) self._store['.zattrs'] = ZarrStore._json(zattrs) self._store['.zarray'] = ZarrStore._json( { 'zarr_format': 2, 'shape': self._tiled.shape, 'chunks': self._tiled.chunks, 'dtype': ZarrStore._dtype_str(self._dtype), 'compressor': None, 'fill_value': ZarrStore._value(fillvalue, self._dtype), 'order': 'C', 'filters': None, } ) def _contains(self, key: str, /) -> bool: """Return if key is in store.""" try: indices = tuple(int(i) for i in key.split('.')) except Exception: return False return indices in self._lookup def _getitem(self, key: str, /) -> NDArray[Any]: """Return chunk from file.""" indices = tuple(int(i) for i in key.split('.')) filename = self._lookup.get(indices, None) if filename is None: raise KeyError(key) return self._imread(filename, **self._kwargs) def _setitem(self, key: str, value: bytes, /) -> None: raise PermissionError('ZarrStore is read-only') def write_fsspec( self, jsonfile: str | os.PathLike[Any] | TextIO, /, url: str, *, quote: bool | None = None, groupname: str | None = None, templatename: str | None = None, codec_id: str | None = None, version: int | None = None, _append: bool = False, _close: bool = True, ) -> None: """Write fsspec ReferenceFileSystem as JSON to file. Parameters: jsonfile: Name or open file handle of output JSON file. url: Remote location of TIFF file(s) without file name(s). quote: Quote file names, that is, replace ' ' with '%20'. The default is True. groupname: Zarr group name. templatename: Version 1 URL template name. The default is 'u'. codec_id: Name of Numcodecs codec to decode files or chunks. version: Version of fsspec file to write. The default is 0. _append, _close: Experimental API. References: - `fsspec ReferenceFileSystem format `_ """ from urllib.parse import quote as quote_ kwargs = self._kwargs.copy() if codec_id is not None: pass elif self._imread == imread: codec_id = 'tifffile' elif 'imagecodecs.' in self._imread.__module__: if ( self._imread.__name__ != 'imread' or 'codec' not in self._kwargs ): raise ValueError('cannot determine codec_id') codec = kwargs.pop('codec') if isinstance(codec, (list, tuple)): codec = codec[0] if callable(codec): codec = codec.__name__.split('_')[0] codec_id = { 'apng': 'imagecodecs_apng', 'avif': 'imagecodecs_avif', 'gif': 'imagecodecs_gif', 'heif': 'imagecodecs_heif', 'jpeg': 'imagecodecs_jpeg', 'jpeg8': 'imagecodecs_jpeg', 'jpeg12': 'imagecodecs_jpeg', 'jpeg2k': 'imagecodecs_jpeg2k', 'jpegls': 'imagecodecs_jpegls', 'jpegxl': 'imagecodecs_jpegxl', 'jpegxr': 'imagecodecs_jpegxr', 'ljpeg': 'imagecodecs_ljpeg', 'lerc': 'imagecodecs_lerc', # 'npy': 'imagecodecs_npy', 'png': 'imagecodecs_png', 'qoi': 'imagecodecs_qoi', 'tiff': 'imagecodecs_tiff', 'webp': 'imagecodecs_webp', 'zfp': 'imagecodecs_zfp', }[codec] else: # TODO: choose codec from filename raise ValueError('cannot determine codec_id') if url is None: url = '' elif url and url[-1] != '/': url += '/' if groupname is None: groupname = '' elif groupname and groupname[-1] != '/': groupname += '/' refs: dict[str, Any] = {} if version == 1: if _append: raise ValueError('cannot append to version 1 files') if templatename is None: templatename = 'u' refs['version'] = 1 refs['templates'] = {templatename: url} refs['gen'] = [] refs['refs'] = refzarr = {} url = '{{%s}}' % templatename else: refzarr = refs if groupname and not _append: refzarr['.zgroup'] = ZarrStore._json({'zarr_format': 2}).decode() for key, value in self._store.items(): if '.zarray' in key: value = json.loads(value) # TODO: make kwargs serializable value['compressor'] = {'id': codec_id, **kwargs} value = ZarrStore._json(value) refzarr[groupname + key] = value.decode() fh: TextIO if hasattr(jsonfile, 'write'): fh = jsonfile # type: ignore else: fh = open(jsonfile, 'w', encoding='utf-8') if version == 1: fh.write(json.dumps(refs, indent=1).rsplit('}"', 1)[0] + '}"') indent = ' ' elif _append: fh.write(',\n') fh.write(json.dumps(refs, indent=1)[2:-2]) indent = ' ' else: fh.write(json.dumps(refs, indent=1)[:-2]) indent = ' ' prefix = len(self._commonpath) for key, value in self._store.items(): if '.zarray' in key: value = json.loads(value) for index, filename in sorted( self._lookup.items(), key=lambda x: x[0] ): filename = filename[prefix:].replace('\\', '/') if quote is None or quote: filename = quote_(filename) if filename[0] == '/': filename = filename[1:] indexstr = '.'.join(str(i) for i in index) fh.write( f',\n{indent}"{groupname}{indexstr}": ' f'["{url}{filename}"]' ) if version == 1: fh.write('\n }\n}') elif _close: fh.write('\n}') if not hasattr(jsonfile, 'write'): fh.close() def __enter__(self) -> ZarrFileSequenceStore: return self def __repr__(self) -> str: return f'' def __str__(self) -> str: return '\n '.join( ( self.__class__.__name__, 'shape: {}'.format( ', '.join(str(i) for i in self._tiled.shape) ), 'chunks: {}'.format( ', '.join(str(i) for i in self._tiled.chunks) ), f'dtype: {self._dtype}', f'fillvalue: {self._fillvalue}', ) ) class FileSequence(collections.abc.Sequence): r"""Sequence of files containing compatible array data. Parameters: imread: Function to read image array from single file. files: Glob filename pattern or sequence of file names. If *None*, use '\*'. All files must contain array data of same shape and dtype. Binary streams are not supported. container: Name or open instance of ZIP file in which files are stored. sort: Function to sort file names if `files` is a pattern. The default is :py:func:`natural_sorted`. If *False*, disable sorting. parse: Function to parse sequence of sorted file names to dims, shape, chunk indices, and filtered file names. The default is :py:func:`parse_filenames` if `kwargs` contains `'pattern'`. **kwargs: Additional arguments passed to `parse` function. Examples: >>> filenames = ['temp_C001T002.tif', 'temp_C001T001.tif'] >>> ims = TiffSequence(filenames, pattern=r'_(C)(\d+)(T)(\d+)') >>> ims[0] 'temp_C001T002.tif' >>> ims.shape (1, 2) >>> ims.axes 'CT' """ imread: Callable[..., NDArray[Any]] """Function to read image array from single file.""" shape: tuple[int, ...] """Shape of file series. Excludes shape of chunks in files.""" axes: str """Character codes for dimensions in shape.""" dims: tuple[str, ...] """Names of dimensions in shape.""" indices: tuple[tuple[int, ...]] """Indices of files in shape.""" _files: list[str] # list of file names _container: Any # TODO: container type? def __init__( self, imread: Callable[..., NDArray[Any]], files: ( str | os.PathLike[Any] | Sequence[str | os.PathLike[Any]] | None ), *, container: str | os.PathLike[Any] | None = None, sort: Callable[..., Any] | bool | None = None, parse: Callable[..., Any] | None = None, **kwargs: Any, ) -> None: sort_func: Callable[..., list[str]] | None = None if files is None: files = '*' if sort is None: sort_func = natural_sorted elif callable(sort): sort_func = sort elif sort: sort_func = natural_sorted # elif not sort: # sort_func = None self._container = container if container is not None: import fnmatch if isinstance(container, (str, os.PathLike)): import zipfile self._container = zipfile.ZipFile(container) elif not hasattr(self._container, 'open'): raise ValueError('invalid container') if isinstance(files, str): files = fnmatch.filter(self._container.namelist(), files) if sort_func is not None: files = sort_func(files) elif isinstance(files, os.PathLike): files = [os.fspath(files)] if sort is not None and sort_func is not None: files = sort_func(files) elif isinstance(files, str): files = glob.glob(files) if sort_func is not None: files = sort_func(files) files = [os.fspath(f) for f in files] # type: ignore if not files: raise ValueError('no files found') if not callable(imread): raise ValueError('invalid imread function') if container: # redefine imread to read from container def imread_(fname: str, _imread=imread, **kwargs) -> NDArray[Any]: with self._container.open(fname) as handle1: with io.BytesIO(handle1.read()) as handle2: return _imread(handle2, **kwargs) imread = imread_ if parse is None and kwargs.get('pattern', None): parse = parse_filenames if parse: try: dims, shape, indices, files = parse(files, **kwargs) except ValueError as exc: raise ValueError('failed to parse file names') from exc else: dims = ('sequence',) shape = (len(files),) indices = tuple((i,) for i in range(len(files))) assert isinstance(files, list) and isinstance(files[0], str) codes = TIFF.AXES_CODES axes = ''.join(codes.get(dim.lower(), dim[0].upper()) for dim in dims) self._files = files self.imread = imread self.axes = axes self.dims = tuple(dims) self.shape = tuple(shape) self.indices = indices def asarray( self, *, imreadargs: dict[str, Any] | None = None, chunkshape: tuple[int, ...] | None = None, chunkdtype: DTypeLike | None = None, dtype: DTypeLike | None = None, # deprecated axestiled: dict[int, int] | Sequence[tuple[int, int]] | None = None, out_inplace: bool | None = None, ioworkers: int | None = 1, out: OutputType = None, **kwargs: Any, ) -> NDArray[Any]: """Return images from files as NumPy array. Parameters: imreadargs: Arguments passed to :py:attr:`FileSequence.imread`. chunkshape: Shape of chunk in each file. Must match ``FileSequence.imread(file, **imreadargs).shape``. By default, this is determined by reading the first file. chunkdtype: Data type of chunk in each file. Must match ``FileSequence.imread(file, **imreadargs).dtype``. By default, this is determined by reading the first file. axestiled: Axes to be tiled. Map stacked sequence axis to chunk axis. ioworkers: Maximum number of threads to execute :py:attr:`FileSequence.imread` asynchronously. If *0*, use up to :py:attr:`_TIFF.MAXIOWORKERS` threads. Using threads can significantly improve runtime when reading many small files from a network share. out_inplace: :py:attr:`FileSequence.imread` decodes directly to the output instead of returning an array, which is copied to the output. Not all imread functions support this, especially in non-contiguous cases. out: Specifies how image array is returned. By default, create a new array. If a *numpy.ndarray*, a writable array to which the images are copied. If *'memmap'*, create a memory-mapped array in a temporary file. If a *string* or *open file*, the file used to create a memory-mapped array. **kwargs: Arguments passed to :py:attr:`FileSequence.imread` in addition to `imreadargs`. Raises: IndexError, ValueError: Array shapes do not match. """ # TODO: deprecate kwargs? files = self._files if imreadargs is not None: kwargs |= imreadargs if ioworkers is None or ioworkers < 1: ioworkers = TIFF.MAXIOWORKERS ioworkers = min(len(files), ioworkers) assert isinstance(ioworkers, int) # mypy bug? if out_inplace is None and self.imread == imread: out_inplace = True else: out_inplace = bool(out_inplace) if dtype is not None: warnings.warn( ' ' 'the dtype argument is deprecated since 2024.2.12. ' 'Use chunkdtype', DeprecationWarning, stacklevel=2, ) chunkdtype = dtype del dtype if chunkshape is None or chunkdtype is None: im = self.imread(files[0], **kwargs) chunkshape = im.shape chunkdtype = im.dtype del im chunkdtype = numpy.dtype(chunkdtype) if axestiled: tiled = TiledSequence(self.shape, chunkshape, axestiled=axestiled) result = create_output(out, tiled.shape, chunkdtype) def func(index: tuple[int | slice, ...], fname: str) -> None: # read single image from file into result # if index is None: # return if out_inplace: self.imread(fname, out=result[index], **kwargs) else: im = self.imread(fname, **kwargs) result[index] = im del im # delete memory-mapped file if ioworkers < 2: for index, fname in zip(tiled.slices(self.indices), files): func(index, fname) else: with ThreadPoolExecutor(ioworkers) as executor: for _ in executor.map( func, tiled.slices(self.indices), files ): pass else: shape = self.shape + chunkshape result = create_output(out, shape, chunkdtype) result = result.reshape(-1, *chunkshape) def func(index: tuple[int | slice, ...], fname: str) -> None: # read single image from file into result if index is None: return index_ = int( numpy.ravel_multi_index(index, self.shape) # type: ignore ) if out_inplace: self.imread(fname, out=result[index_], **kwargs) else: im = self.imread(fname, **kwargs) result[index_] = im del im # delete memory-mapped file if ioworkers < 2: for index, fname in zip(self.indices, files): func(index, fname) else: with ThreadPoolExecutor(ioworkers) as executor: for _ in executor.map(func, self.indices, files): pass result.shape = shape return result def aszarr(self, **kwargs: Any) -> ZarrFileSequenceStore: """Return images from files as Zarr store. Parameters: **kwargs: Arguments passed to :py:class:`ZarrFileSequenceStore`. """ return ZarrFileSequenceStore(self, **kwargs) def close(self) -> None: """Close open files.""" if self._container is not None: self._container.close() self._container = None def commonpath(self) -> str: """Return longest common sub-path of each file in sequence.""" if len(self._files) == 1: commonpath = os.path.dirname(self._files[0]) else: commonpath = os.path.commonpath(self._files) return commonpath @property def files(self) -> list[str]: """Deprecated. Use the FileSequence sequence interface. :meta private: """ warnings.warn( ' is deprecated since 2024.5.22. ' 'Use the FileSequence sequence interface.', DeprecationWarning, stacklevel=2, ) return self._files @property def files_missing(self) -> int: """Number of empty chunks.""" return product(self.shape) - len(self._files) def __iter__(self) -> Iterator[str]: """Return iterator over all file names.""" return iter(self._files) def __len__(self) -> int: return len(self._files) @overload def __getitem__(self, key: int, /) -> str: ... @overload def __getitem__(self, key: slice, /) -> list[str]: ... def __getitem__(self, key: int | slice, /) -> str | list[str]: return self._files[key] def __enter__(self) -> FileSequence: return self def __exit__(self, exc_type, exc_value, traceback) -> None: # type: ignore self.close() def __repr__(self) -> str: return f'' def __str__(self) -> str: file = str(self._container) if self._container else self._files[0] file = os.path.split(file)[-1] return '\n '.join( ( self.__class__.__name__, file, f'files: {len(self._files)} ({self.files_missing} missing)', 'shape: {}'.format(', '.join(str(i) for i in self.shape)), 'dims: {}'.format(', '.join(s for s in self.dims)), # f'axes: {self.axes}', ) ) @final class TiffSequence(FileSequence): r"""Sequence of TIFF files containing compatible array data. Same as :py:class:`FileSequence` with the :py:func:`imread` function, `'\*.tif'` glob pattern, and `out_inplace` enabled by default. """ def __init__( self, files: ( str | os.PathLike[Any] | Sequence[str | os.PathLike[Any]] | None ) = None, *, imread: Callable[..., NDArray[Any]] = imread, # type: ignore **kwargs: Any, ) -> None: super().__init__(imread, '*.tif' if files is None else files, **kwargs) def __repr__(self) -> str: return f'' @final class TiledSequence: """Tiled sequence of chunks. Transform a sequence of stacked chunks to tiled chunks. Parameters: stackshape: Shape of stacked sequence excluding chunks. chunkshape: Shape of chunks. axestiled: Axes to be tiled. Map stacked sequence axis to chunk axis. By default, the sequence is not tiled. axes: Character codes for dimensions in stackshape and chunkshape. Examples: >>> ts = TiledSequence((1, 2), (3, 4), axestiled={1: 0}, axes='ABYX') >>> ts.shape (1, 6, 4) >>> ts.chunks (1, 3, 4) >>> ts.axes 'AYX' """ chunks: tuple[int, ...] """Shape of chunks in tiled sequence.""" # with same number of dimensions as shape shape: tuple[int, ...] """Shape of tiled sequence including chunks.""" axes: str | tuple[str, ...] | None """Dimensions codes of tiled sequence.""" shape_squeezed: tuple[int, ...] """Shape of tiled sequence with length-1 dimensions removed.""" axes_squeezed: str | tuple[str, ...] | None """Dimensions codes of tiled sequence with length-1 dimensions removed.""" _stackdims: int """Number of dimensions in stack excluding chunks.""" _chunkdims: int """Number of dimensions in chunks.""" _shape_untiled: tuple[int, ...] """Shape of untiled sequence (stackshape + chunkshape).""" _axestiled: tuple[tuple[int, int], ...] """Map axes to tile from stack to chunks.""" def __init__( self, stackshape: Sequence[int], chunkshape: Sequence[int], /, *, axestiled: dict[int, int] | Sequence[tuple[int, int]] | None = None, axes: str | Sequence[str] | None = None, ) -> None: self._stackdims = len(stackshape) self._chunkdims = len(chunkshape) self._shape_untiled = tuple(stackshape) + tuple(chunkshape) if axes is not None and len(axes) != len(self._shape_untiled): raise ValueError( 'axes length does not match stackshape + chunkshape' ) if axestiled: axestiled = dict(axestiled) for ax0, ax1 in axestiled.items(): axestiled[ax0] = ax1 + self._stackdims self._axestiled = tuple(reversed(sorted(axestiled.items()))) axes_list = [] if axes is None else list(axes) shape = list(self._shape_untiled) chunks = [1] * self._stackdims + list(chunkshape) used = set() for ax0, ax1 in self._axestiled: if ax0 in used or ax1 in used: raise ValueError('duplicate axis') used.add(ax0) used.add(ax1) shape[ax1] *= stackshape[ax0] for ax0, ax1 in self._axestiled: del shape[ax0] del chunks[ax0] if axes_list: del axes_list[ax0] self.shape = tuple(shape) self.chunks = tuple(chunks) if axes is None: self.axes = None elif isinstance(axes, str): self.axes = ''.join(axes_list) else: self.axes = tuple(axes_list) else: self._axestiled = () self.shape = self._shape_untiled self.chunks = (1,) * self._stackdims + tuple(chunkshape) if axes is None: self.axes = None elif isinstance(axes, str): self.axes = axes else: self.axes = tuple(axes) assert len(self.shape) == len(self.chunks) if self.axes is not None: assert len(self.shape) == len(self.axes) if self.axes is None: self.shape_squeezed = tuple(i for i in self.shape if i > 1) self.axes_squeezed = None else: keep = ('X', 'Y', 'width', 'length', 'height') self.shape_squeezed = tuple( i for i, ax in zip(self.shape, self.axes) if i > 1 or ax in keep ) squeezed = tuple( ax for i, ax in zip(self.shape, self.axes) if i > 1 or ax in keep ) self.axes_squeezed = ( ''.join(squeezed) if isinstance(self.axes, str) else squeezed ) def indices( self, indices: Iterable[Sequence[int]], / ) -> Iterator[tuple[int, ...]]: """Return iterator over chunk indices of tiled sequence. Parameters: indices: Indices of chunks in stacked sequence. """ chunkindex = [0] * self._chunkdims for index in indices: if index is None: yield None else: if len(index) != self._stackdims: raise ValueError(f'{len(index)} != {self._stackdims}') index = list(index) + chunkindex for ax0, ax1 in self._axestiled: index[ax1] = index[ax0] for ax0, ax1 in self._axestiled: del index[ax0] yield tuple(index) def slices( self, indices: Iterable[Sequence[int]] | None = None, / ) -> Iterator[tuple[int | slice, ...]]: """Return iterator over slices of chunks in tiled sequence. Parameters: indices: Indices of chunks in stacked sequence. """ wholeslice: list[int | slice] chunkslice: list[int | slice] = [slice(None)] * self._chunkdims if indices is None: indices = numpy.ndindex(self._shape_untiled[: self._stackdims]) for index in indices: if index is None: yield None else: assert len(index) == self._stackdims wholeslice = [*index, *chunkslice] for ax0, ax1 in self._axestiled: j = self._shape_untiled[ax1] i = cast(int, wholeslice[ax0]) * j wholeslice[ax1] = slice(i, i + j) for ax0, ax1 in self._axestiled: del wholeslice[ax0] yield tuple(wholeslice) @property def ndim(self) -> int: """Number of dimensions of tiled sequence excluding chunks.""" return len(self.shape) @property def is_tiled(self) -> bool: """Sequence is tiled.""" return bool(self._axestiled) @final class FileHandle: """Binary file handle. A limited, special purpose binary file handle that can: - handle embedded files (for example, LSM within LSM files). - re-open closed files (for multi-file formats, such as OME-TIFF). - read and write NumPy arrays and records from file-like objects. When initialized from another file handle, do not use the other handle unless this FileHandle is closed. FileHandle instances are not thread-safe. Parameters: file: File name or seekable binary stream, such as open file, BytesIO, or fsspec OpenFile. mode: File open mode if `file` is file name. The default is 'rb'. Files are always opened in binary mode. name: Name of file if `file` is binary stream. offset: Start position of embedded file. The default is the current file position. size: Size of embedded file. The default is the number of bytes from `offset` to the end of the file. """ # TODO: make FileHandle a subclass of BinaryIO __slots__ = ( '_fh', '_file', '_mode', '_name', '_dir', '_lock', '_offset', '_size', '_close', ) _file: str | os.PathLike[Any] | FileHandle | BinaryIO | None _fh: BinaryIO | None _mode: str _name: str _dir: str _offset: int _size: int _close: bool _lock: threading.RLock | NullContext def __init__( self, file: str | os.PathLike[Any] | FileHandle | BinaryIO, /, mode: ( Literal['r', 'r+', 'w', 'x', 'rb', 'r+b', 'wb', 'xb'] | None ) = None, *, name: str | None = None, offset: int | None = None, size: int | None = None, ) -> None: self._mode = 'rb' if mode is None else mode self._fh = None self._file = file # reference to original argument for re-opening self._name = name if name else '' self._dir = '' self._offset = -1 if offset is None else offset self._size = -1 if size is None else size self._close = True self._lock = NullContext() self.open() assert self._fh is not None def open(self) -> None: """Open or re-open file.""" if self._fh is not None: return # file is open if isinstance(self._file, os.PathLike): self._file = os.fspath(self._file) if isinstance(self._file, str): # file name if self._mode[-1:] != 'b': self._mode += 'b' # type: ignore if self._mode not in {'rb', 'r+b', 'wb', 'xb'}: raise ValueError(f'invalid mode {self._mode}') self._file = os.path.realpath(self._file) self._dir, self._name = os.path.split(self._file) self._fh = open( self._file, self._mode, encoding=None ) # type: ignore self._close = True self._offset = max(0, self._offset) elif isinstance(self._file, FileHandle): # FileHandle self._fh = self._file._fh self._offset = max(0, self._offset) self._offset += self._file._offset self._close = False if not self._name: if self._offset: name, ext = os.path.splitext(self._file._name) self._name = f'{name}@{self._offset}{ext}' else: self._name = self._file._name self._mode = self._file._mode self._dir = self._file._dir elif hasattr(self._file, 'seek'): # binary stream: open file, BytesIO, fsspec LocalFileOpener # cast to BinaryIO even it might not be self._fh = cast(BinaryIO, self._file) try: self._fh.tell() except Exception as exc: raise ValueError('binary stream is not seekable') from exc if self._offset < 0: self._offset = self._fh.tell() self._close = False if not self._name: try: self._dir, self._name = os.path.split(self._fh.name) except AttributeError: try: self._dir, self._name = os.path.split( self._fh.path # type: ignore ) except AttributeError: self._name = 'Unnamed binary stream' try: self._mode = self._fh.mode except AttributeError: pass elif hasattr(self._file, 'open'): # fsspec OpenFile _file: Any = self._file self._fh = cast(BinaryIO, _file.open()) try: self._fh.tell() except Exception as exc: try: self._fh.close() except Exception: pass raise ValueError('OpenFile is not seekable') from exc if self._offset < 0: self._offset = self._fh.tell() self._close = True if not self._name: try: self._dir, self._name = os.path.split(_file.path) except AttributeError: self._name = 'Unnamed binary stream' try: self._mode = _file.mode except AttributeError: pass else: raise ValueError( 'the first parameter must be a file name ' 'or seekable binary file object, ' f'not {type(self._file)!r}' ) assert self._fh is not None if self._offset: self._fh.seek(self._offset) if self._size < 0: pos = self._fh.tell() self._fh.seek(self._offset, os.SEEK_END) self._size = self._fh.tell() self._fh.seek(pos) def close(self) -> None: """Close file handle.""" if self._close and self._fh is not None: try: self._fh.close() except Exception: # PermissionError on MacOS. See issue #184 pass self._fh = None def fileno(self) -> int: """Return underlying file descriptor if exists, else raise OSError.""" assert self._fh is not None try: return self._fh.fileno() except (OSError, AttributeError) as exc: raise OSError( f'{type(self._fh)} does not have a file descriptor' ) from exc def writable(self) -> bool: """Return True if stream supports writing.""" assert self._fh is not None if hasattr(self._fh, 'writable'): return self._fh.writable() return False def seekable(self) -> bool: """Return True if stream supports random access.""" return True def tell(self) -> int: """Return file's current position.""" assert self._fh is not None return self._fh.tell() - self._offset def seek(self, offset: int, /, whence: int = 0) -> int: """Set file's current position. Parameters: offset: Position of file handle relative to position indicated by `whence`. whence: Relative position of `offset`. 0 (`os.SEEK_SET`) beginning of file (default). 1 (`os.SEEK_CUR`) current position. 2 (`os.SEEK_END`) end of file. """ assert self._fh is not None if self._offset: if whence == 0: return ( self._fh.seek(self._offset + offset, whence) - self._offset ) if whence == 2 and self._size > 0: return ( self._fh.seek(self._offset + self._size + offset, 0) - self._offset ) return self._fh.seek(offset, whence) def read(self, size: int = -1, /) -> bytes: """Return bytes read from file. Parameters: size: Number of bytes to read from file. By default, read until the end of the file. """ if size < 0 and self._offset: size = self._size assert self._fh is not None return self._fh.read(size) def readinto(self, buffer, /) -> int: """Read bytes from file into buffer. Parameters: buffer: Buffer to read into. Returns: Number of bytes read from file. """ assert self._fh is not None return self._fh.readinto(buffer) # type: ignore def write(self, buffer: bytes, /) -> int: """Write bytes to file and return number of bytes written. Parameters: buffer: Bytes to write to file. Returns: Number of bytes written. """ assert self._fh is not None return self._fh.write(buffer) def flush(self) -> None: """Flush write buffers of stream if applicable.""" assert self._fh is not None if hasattr(self._fh, 'flush'): self._fh.flush() def memmap_array( self, dtype: DTypeLike, shape: tuple[int, ...], offset: int = 0, *, mode: str = 'r', order: str = 'C', ) -> NDArray[Any]: """Return `numpy.memmap` of array data stored in file. Parameters: dtype: Data type of array in file. shape: Shape of array in file. offset: Start position of array-data in file. mode: File is opened in this mode. The default is read-only. order: Order of ndarray memory layout. The default is 'C'. """ if not self.is_file: raise ValueError('cannot memory-map file without fileno') assert self._fh is not None return numpy.memmap( self._fh, # type: ignore dtype=dtype, mode=mode, offset=self._offset + offset, shape=shape, order=order, ) def read_array( self, dtype: DTypeLike, count: int = -1, offset: int = 0, *, out: NDArray[Any] | None = None, ) -> NDArray[Any]: """Return NumPy array from file in native byte order. Parameters: dtype: Data type of array to read. count: Number of items to read. By default, all items are read. offset: Start position of array-data in file. out: NumPy array to read into. By default, a new array is created. """ dtype = numpy.dtype(dtype) if count < 0: nbytes = self._size if out is None else out.nbytes count = nbytes // dtype.itemsize else: nbytes = count * dtype.itemsize result = numpy.empty(count, dtype) if out is None else out if result.nbytes != nbytes: raise ValueError('size mismatch') assert self._fh is not None if offset: self._fh.seek(self._offset + offset) try: n = self._fh.readinto(result) # type: ignore except AttributeError: result[:] = numpy.frombuffer(self._fh.read(nbytes), dtype).reshape( result.shape ) n = nbytes if n != nbytes: raise ValueError(f'failed to read {nbytes} bytes, got {n}') if not result.dtype.isnative: if not dtype.isnative: result.byteswap(True) result = result.view(result.dtype.newbyteorder()) elif result.dtype.isnative != dtype.isnative: result.byteswap(True) if out is not None: if hasattr(out, 'flush'): out.flush() return result def read_record( self, dtype: DTypeLike, shape: tuple[int, ...] | int | None = 1, *, byteorder: Literal['S', '<', '>', '=', '|'] | None = None, ) -> numpy.recarray: """Return NumPy record from file. Parameters: dtype: Data type of record array to read. shape: Shape of record array to read. byteorder: Byte order of record array to read. """ assert self._fh is not None dtype = numpy.dtype(dtype) if byteorder is not None: dtype = dtype.newbyteorder(byteorder) try: record = numpy.rec.fromfile(self._fh, dtype, shape) # type: ignore except Exception: if shape is None: shape = self._size // dtype.itemsize size = product(sequence(shape)) * dtype.itemsize # data = bytearray(size) # n = self._fh.readinto(data) # data = data[:n] # TODO: record is not writable data = self._fh.read(size) record = numpy.rec.fromstring( data, dtype, shape, ) return record[0] if shape == 1 else record def write_empty(self, size: int, /) -> int: """Append null-bytes to file. The file position must be at the end of the file. Parameters: size: Number of null-bytes to write to file. """ if size < 1: return 0 assert self._fh is not None self._fh.seek(size - 1, os.SEEK_CUR) self._fh.write(b'\x00') return size def write_array( self, data: NDArray[Any], dtype: DTypeLike = None, /, ) -> int: """Write NumPy array to file in C contiguous order. Parameters: data: Array to write to file. """ assert self._fh is not None pos = self._fh.tell() # writing non-contiguous arrays is very slow data = numpy.ascontiguousarray(data, dtype) try: data.tofile(self._fh) except io.UnsupportedOperation: # numpy cannot write to BytesIO self._fh.write(data.tobytes()) return self._fh.tell() - pos def read_segments( self, offsets: Sequence[int], bytecounts: Sequence[int], /, indices: Sequence[int] | None = None, *, sort: bool = True, lock: threading.RLock | NullContext | None = None, buffersize: int | None = None, flat: bool = True, ) -> ( Iterator[tuple[bytes | None, int]] | Iterator[list[tuple[bytes | None, int]]] ): """Return iterator over segments read from file and their indices. The purpose of this function is to - reduce small or random reads. - reduce acquiring reentrant locks. - synchronize seeks and reads. - limit size of segments read into memory at once. (ThreadPoolExecutor.map is not collecting iterables lazily). Parameters: offsets: Offsets of segments to read from file. bytecounts: Byte counts of segments to read from file. indices: Indices of segments in image. The default is `range(len(offsets))`. sort: Read segments from file in order of their offsets. lock: Reentrant lock to synchronize seeks and reads. buffersize: Approximate number of bytes to read from file in one pass. The default is :py:attr:`_TIFF.BUFFERSIZE`. flat: If *True*, return iterator over individual (segment, index) tuples. Else, return an iterator over a list of (segment, index) tuples that were acquired in one pass. Yields: Individual or lists of `(segment, index)` tuples. """ # TODO: Cythonize this? assert self._fh is not None length = len(offsets) if length < 1: return if length == 1: index = 0 if indices is None else indices[0] if bytecounts[index] > 0 and offsets[index] > 0: if lock is None: lock = self._lock with lock: self.seek(offsets[index]) data = self._fh.read(bytecounts[index]) else: data = None yield (data, index) if flat else [(data, index)] return if lock is None: lock = self._lock if buffersize is None: buffersize = TIFF.BUFFERSIZE if indices is None: segments = [(i, offsets[i], bytecounts[i]) for i in range(length)] else: segments = [ (indices[i], offsets[i], bytecounts[i]) for i in range(length) ] if sort: segments = sorted(segments, key=lambda x: x[1]) iscontig = True for i in range(length - 1): _, offset, bytecount = segments[i] nextoffset = segments[i + 1][1] if offset == 0 or bytecount == 0 or nextoffset == 0: continue if offset + bytecount != nextoffset: iscontig = False break seek = self.seek read = self._fh.read result: list[tuple[bytes | None, int]] if iscontig: # consolidate reads i = 0 while i < length: j = i offset = -1 bytecount = 0 while bytecount <= buffersize and i < length: _, o, b = segments[i] if o > 0 and b > 0: if offset < 0: offset = o bytecount += b i += 1 if offset < 0: data = None else: with lock: seek(offset) data = read(bytecount) start = 0 stop = 0 result = [] while j < i: index, offset, bytecount = segments[j] if offset > 0 and bytecount > 0: stop += bytecount result.append( (data[start:stop], index) # type: ignore ) start = stop else: result.append((None, index)) j += 1 if flat: yield from result else: yield result return i = 0 while i < length: result = [] size = 0 with lock: while size <= buffersize and i < length: index, offset, bytecount = segments[i] if offset > 0 and bytecount > 0: seek(offset) result.append((read(bytecount), index)) # buffer = bytearray(bytecount) # n = fh.readinto(buffer) # data.append(buffer[:n]) size += bytecount else: result.append((None, index)) i += 1 if flat: yield from result else: yield result def __enter__(self) -> FileHandle: return self def __exit__(self, exc_type, exc_value, traceback) -> None: # type: ignore self.close() self._file = None # TODO: this may crash the Python interpreter under certain conditions # def __getattr__(self, name: str, /) -> Any: # """Return attribute from underlying file object.""" # if self._offset: # warnings.warn( # ' ' # f'{name} not implemented for embedded files', # UserWarning, # ) # return getattr(self._fh, name) def __repr__(self) -> str: return f'' def __str__(self) -> str: return '\n '.join( ( 'FileHandle', self._name, self._dir, f'{self._size} bytes', 'closed' if self._fh is None else 'open', ) ) @property def name(self) -> str: """Name of file or stream.""" return self._name @property def dirname(self) -> str: """Directory in which file is stored.""" return self._dir @property def path(self) -> str: """Absolute path of file.""" return os.path.join(self._dir, self._name) @property def extension(self) -> str: """File name extension of file or stream.""" name, ext = os.path.splitext(self._name.lower()) if ext and name.endswith('.ome'): ext = '.ome' + ext return ext @property def size(self) -> int: """Size of file in bytes.""" return self._size @property def closed(self) -> bool: """File is closed.""" return self._fh is None @property def lock(self) -> threading.RLock | NullContext: """Reentrant lock to synchronize reads and writes.""" return self._lock @lock.setter def lock(self, value: bool, /) -> None: self.set_lock(value) def set_lock(self, value: bool, /) -> None: if bool(value) == isinstance(self._lock, NullContext): self._lock = threading.RLock() if value else NullContext() @property def has_lock(self) -> bool: """A reentrant lock is currently used to sync reads and writes.""" return not isinstance(self._lock, NullContext) @property def is_file(self) -> bool: """File has fileno and can be memory-mapped.""" try: self._fh.fileno() # type: ignore return True except Exception: return False @final class FileCache: """Keep FileHandles open. Parameters: size: Maximum number of files to keep open. The default is 8. lock: Reentrant lock to synchronize reads and writes. """ __slots__ = ('files', 'keep', 'past', 'lock', 'size') size: int """Maximum number of files to keep open.""" files: dict[FileHandle, int] """Reference counts of opened files.""" keep: set[FileHandle] """Set of files to keep open.""" past: list[FileHandle] """FIFO list of opened files.""" lock: threading.RLock | NullContext """Reentrant lock to synchronize reads and writes.""" def __init__( self, size: int | None = None, *, lock: threading.RLock | NullContext | None = None, ) -> None: self.past = [] self.files = {} self.keep = set() self.size = 8 if size is None else int(size) self.lock = NullContext() if lock is None else lock def open(self, fh: FileHandle, /) -> None: """Open file, re-open if necessary.""" with self.lock: if fh in self.files: self.files[fh] += 1 elif fh.closed: fh.open() self.files[fh] = 1 self.past.append(fh) else: self.files[fh] = 2 self.keep.add(fh) self.past.append(fh) def close(self, fh: FileHandle, /) -> None: """Close least recently used open files.""" with self.lock: if fh in self.files: self.files[fh] -= 1 self._trim() def clear(self) -> None: """Close all opened files if not in use when opened first.""" with self.lock: for fh, refcount in list(self.files.items()): if fh not in self.keep: fh.close() del self.files[fh] del self.past[self.past.index(fh)] def read( self, fh: FileHandle, /, offset: int, bytecount: int, whence: int = 0, ) -> bytes: """Return bytes read from binary file. Parameters: fh: File handle to read from. offset: Position in file to start reading from relative to the position indicated by `whence`. bytecount: Number of bytes to read. whence: Relative position of offset. 0 (`os.SEEK_SET`) beginning of file (default). 1 (`os.SEEK_CUR`) current position. 2 (`os.SEEK_END`) end of file. """ # this function is more efficient than # filecache.open(fh) # with lock: # fh.seek() # data = fh.read() # filecache.close(fh) with self.lock: b = fh not in self.files if b: if fh.closed: fh.open() self.files[fh] = 0 else: self.files[fh] = 1 self.keep.add(fh) self.past.append(fh) fh.seek(offset, whence) data = fh.read(bytecount) if b: self._trim() return data def write( self, fh: FileHandle, /, offset: int, data: bytes, whence: int = 0, ) -> int: """Write bytes to binary file. Parameters: fh: File handle to write to. offset: Position in file to start writing from relative to the position indicated by `whence`. value: Bytes to write. whence: Relative position of offset. 0 (`os.SEEK_SET`) beginning of file (default). 1 (`os.SEEK_CUR`) current position. 2 (`os.SEEK_END`) end of file. """ with self.lock: b = fh not in self.files if b: if fh.closed: fh.open() self.files[fh] = 0 else: self.files[fh] = 1 self.keep.add(fh) self.past.append(fh) fh.seek(offset, whence) written = fh.write(data) if b: self._trim() return written def _trim(self) -> None: """Trim file cache.""" index = 0 size = len(self.past) while index < size > self.size: fh = self.past[index] if fh not in self.keep and self.files[fh] <= 0: fh.close() del self.files[fh] del self.past[index] size -= 1 else: index += 1 def __len__(self) -> int: """Return number of open files.""" return len(self.files) def __repr__(self) -> str: return f'' @final class StoredShape(collections.abc.Sequence): """Normalized shape of image array in TIFF pages. Parameters: frames: Number of TIFF pages. separate_samples: Number of separate samples. depth: Image depth. length: Image length (height). width: Image width. contig_samples: Number of contiguous samples. extrasamples: Number of extra samples. """ __slots__ = ( 'frames', 'separate_samples', 'depth', 'length', 'width', 'contig_samples', 'extrasamples', ) frames: int """Number of TIFF pages.""" separate_samples: int """Number of separate samples.""" depth: int """Image depth. Value of ImageDepth tag or 1.""" length: int """Image length (height). Value of ImageLength tag.""" width: int """Image width. Value of ImageWidth tag.""" contig_samples: int """Number of contiguous samples.""" extrasamples: int """Number of extra samples. Count of ExtraSamples tag or 0.""" def __init__( self, frames: int = 1, separate_samples: int = 1, depth: int = 1, length: int = 1, width: int = 1, contig_samples: int = 1, extrasamples: int = 0, ) -> None: if separate_samples != 1 and contig_samples != 1: raise ValueError('invalid samples') self.frames = int(frames) self.separate_samples = int(separate_samples) self.depth = int(depth) self.length = int(length) self.width = int(width) self.contig_samples = int(contig_samples) self.extrasamples = int(extrasamples) @property def size(self) -> int: """Product of all dimensions.""" return ( abs(self.frames) * self.separate_samples * self.depth * self.length * self.width * self.contig_samples ) @property def samples(self) -> int: """Number of samples. Count of SamplesPerPixel tag.""" assert self.separate_samples == 1 or self.contig_samples == 1 samples = ( self.separate_samples if self.separate_samples > 1 else self.contig_samples ) assert self.extrasamples < samples return samples @property def photometric_samples(self) -> int: """Number of photometric samples.""" return self.samples - self.extrasamples @property def shape(self) -> tuple[int, int, int, int, int, int]: """Normalized 6D shape of image array in all pages.""" return ( self.frames, self.separate_samples, self.depth, self.length, self.width, self.contig_samples, ) @property def page_shape(self) -> tuple[int, int, int, int, int]: """Normalized 5D shape of image array in single page.""" return ( self.separate_samples, self.depth, self.length, self.width, self.contig_samples, ) @property def page_size(self) -> int: """Product of dimensions in single page.""" return ( self.separate_samples * self.depth * self.length * self.width * self.contig_samples ) @property def squeezed(self) -> tuple[int, ...]: """Shape with length-1 removed, except for length and width.""" shape = [self.length, self.width] if self.separate_samples > 1: shape.insert(0, self.separate_samples) elif self.contig_samples > 1: shape.append(self.contig_samples) if self.frames > 1: shape.insert(0, self.frames) return tuple(shape) @property def is_valid(self) -> bool: """Shape is valid.""" return ( self.frames >= 1 and self.depth >= 1 and self.length >= 1 and self.width >= 1 and (self.separate_samples == 1 or self.contig_samples == 1) and ( self.contig_samples if self.contig_samples > 1 else self.separate_samples ) > self.extrasamples ) @property def is_planar(self) -> bool: """Shape contains planar samples.""" return self.separate_samples > 1 @property def planarconfig(self) -> int | None: """Value of PlanarConfiguration tag.""" if self.separate_samples > 1: return 2 # PLANARCONFIG.SEPARATE if self.contig_samples > 1: return 1 # PLANARCONFIG.CONTIG return None def __len__(self) -> int: return 6 @overload def __getitem__(self, key: int, /) -> int: ... @overload def __getitem__(self, key: slice, /) -> tuple[int, ...]: ... def __getitem__(self, key: int | slice, /) -> int | tuple[int, ...]: return ( self.frames, self.separate_samples, self.depth, self.length, self.width, self.contig_samples, )[key] def __eq__(self, other: object, /) -> bool: return ( isinstance(other, StoredShape) and self.frames == other.frames and self.separate_samples == other.separate_samples and self.depth == other.depth and self.length == other.length and self.width == other.width and self.contig_samples == other.contig_samples ) def __repr__(self) -> str: return ( '' ) @final class NullContext: """Null context manager. Can be used as a dummy reentrant lock. >>> with NullContext(): ... pass ... """ __slots__ = () def __enter__(self) -> NullContext: return self def __exit__(self, exc_type, exc_value, traceback) -> None: # type: ignore pass def __repr__(self) -> str: return 'NullContext()' @final class Timer: """Stopwatch for timing execution speed. Parameters: message: Message to print. end: End of print statement. started: Value of performance counter when started. The default is the current performance counter. Examples: >>> with Timer('sleep:'): ... time.sleep(1.05) sleep: 1.0... s """ __slots__ = ('started', 'stopped', 'duration') started: float """Value of performance counter when started.""" stopped: float """Value of performance counter when stopped.""" duration: float """Duration between `started` and `stopped` in seconds.""" def __init__( self, message: str | None = None, *, end: str = ' ', started: float | None = None, ) -> None: if message is not None: print(message, end=end, flush=True) self.duration = 0.0 if started is None: started = time.perf_counter() self.started = self.stopped = started def start(self, message: str | None = None, *, end: str = ' ') -> float: """Start timer and return current time.""" if message is not None: print(message, end=end, flush=True) self.duration = 0.0 self.started = self.stopped = time.perf_counter() return self.started def stop(self, message: str | None = None, *, end: str = ' ') -> float: """Return duration of timer till start. Parameters: message: Message to print. end: End of print statement. """ self.stopped = time.perf_counter() if message is not None: print(message, end=end, flush=True) self.duration = self.stopped - self.started return self.duration def print( self, message: str | None = None, *, end: str | None = None ) -> None: """Print duration from timer start till last stop or now. Parameters: message: Message to print. end: End of print statement. """ msg = str(self) if message is not None: print(message, end=' ') print(msg, end=end, flush=True) @staticmethod def clock() -> float: """Return value of performance counter.""" return time.perf_counter() def __str__(self) -> str: """Return duration from timer start till last stop or now.""" if self.duration <= 0.0: # not stopped duration = time.perf_counter() - self.started else: duration = self.duration s = str(datetime.timedelta(seconds=duration)) i = 0 while i < len(s) and s[i : i + 2] in '0:0010203040506070809': i += 1 if s[i : i + 1] == ':': i += 1 return f'{s[i:]} s' def __repr__(self) -> str: return f'Timer(started={self.started})' def __enter__(self) -> Timer: return self def __exit__(self, exc_type, exc_value, traceback) -> None: # type: ignore self.print() class OmeXmlError(Exception): """Exception to indicate invalid OME-XML or unsupported cases.""" @final class OmeXml: """Create OME-TIFF XML metadata. Parameters: **metadata: Additional OME-XML attributes or elements to be stored. Creator: Name of creating application. The default is 'tifffile'. UUID: Unique identifier. Examples: >>> omexml = OmeXml() >>> omexml.addimage( ... dtype='uint16', ... shape=(32, 256, 256), ... storedshape=(32, 1, 1, 256, 256, 1), ... axes='CYX', ... Name='First Image', ... PhysicalSizeX=2.0, ... ) >>> xml = omexml.tostring() >>> xml '...' >>> OmeXml.validate(xml) True """ images: list[str] """OME-XML Image elements.""" annotations: list[str] """OME-XML Annotation elements.""" elements: list[str] """Other OME-XML elements.""" _xml: str _ifd: int def __init__(self, **metadata: Any) -> None: metadata = metadata.get('OME', metadata) self._ifd = 0 self.images = [] self.annotations = [] self.elements = [] # TODO: parse other OME elements from metadata # Project # Dataset # Folder # Experiment # Plate # Screen # Experimenter # ExperimenterGroup # Instrument # StructuredAnnotations # ROI if 'UUID' in metadata: uuid = metadata['UUID'].split(':')[-1] else: from uuid import uuid1 uuid = str(uuid1()) creator = OmeXml._attribute( metadata, 'Creator', default=f'tifffile.py {__version__}' ) schema = 'http://www.openmicroscopy.org/Schemas/OME/2016-06' self._xml = ( '{declaration}' f'' '{images}' '{annotations}' '{elements}' '' ) def addimage( self, dtype: DTypeLike, shape: Sequence[int], storedshape: tuple[int, int, int, int, int, int], *, axes: str | None = None, **metadata: Any, ) -> None: """Add image to OME-XML. The OME model can handle up to 9 dimensional images for selected axes orders. Refer to the OME-XML specification for details. Non-TZCYXS (modulo) dimensions must be after a TZC dimension or require an unused TZC dimension. Parameters: dtype: Data type of image array. shape: Shape of image array. storedshape: Normalized shape describing how image array is stored in TIFF file as (pages, separate_samples, depth, length, width, contig_samples). axes: Character codes for dimensions in `shape`. By default, `axes` is determined from the DimensionOrder metadata attribute or matched to the `shape` in reverse order of TZC(S)YX(S) based on `storedshape`. The following codes are supported: 'S' sample, 'X' width, 'Y' length, 'Z' depth, 'C' channel, 'T' time, 'A' angle, 'P' phase, 'R' tile, 'H' lifetime, 'E' lambda, 'Q' other. **metadata: Additional OME-XML attributes or elements to be stored. Image/Pixels: Name, AcquisitionDate, Description, DimensionOrder, PhysicalSizeX, PhysicalSizeXUnit, PhysicalSizeY, PhysicalSizeYUnit, PhysicalSizeZ, PhysicalSizeZUnit, TimeIncrement, TimeIncrementUnit. Per Plane: DeltaT, DeltaTUnit, ExposureTime, ExposureTimeUnit, PositionX, PositionXUnit, PositionY, PositionYUnit, PositionZ, PositionZUnit. Per Channel: Name, AcquisitionMode, Color, ContrastMethod, EmissionWavelength, EmissionWavelengthUnit, ExcitationWavelength, ExcitationWavelengthUnit, Fluor, IlluminationType, NDFilter, PinholeSize, PinholeSizeUnit, PockelCellSetting. Raises: OmeXmlError: Image format not supported. """ index = len(self.images) # get Image and Pixels metadata metadata = metadata.get('OME', metadata) metadata = metadata.get('Image', metadata) if isinstance(metadata, (list, tuple)): # multiple images metadata = metadata[index] # type: ignore if 'Pixels' in metadata: # merge with Image import copy metadata = copy.deepcopy(metadata) if 'ID' in metadata['Pixels']: del metadata['Pixels']['ID'] metadata.update(metadata['Pixels']) del metadata['Pixels'] try: dtype = numpy.dtype(dtype).name dtype = { 'int8': 'int8', 'int16': 'int16', 'int32': 'int32', 'uint8': 'uint8', 'uint16': 'uint16', 'uint32': 'uint32', 'float32': 'float', 'float64': 'double', 'complex64': 'complex', 'complex128': 'double-complex', 'bool': 'bit', }[dtype] except KeyError as exc: raise OmeXmlError(f'data type {dtype!r} not supported') from exc if metadata.get('Type', dtype) != dtype: raise OmeXmlError( f'metadata Pixels Type {metadata["Type"]!r} ' f'does not match array dtype {dtype!r}' ) samples = 1 planecount, separate, depth, length, width, contig = storedshape if depth != 1: raise OmeXmlError('ImageDepth not supported') if not (separate == 1 or contig == 1): raise ValueError('invalid stored shape') shape = tuple(int(i) for i in shape) ndim = len(shape) if ndim < 1 or product(shape) <= 0: raise OmeXmlError('empty arrays not supported') if axes is None: # get axes from shape, stored shape, and DimensionOrder if contig != 1 or shape[-3:] == (length, width, 1): axes = 'YXS' samples = contig elif separate != 1 or ( ndim == 6 and shape[-3:] == (1, length, width) ): axes = 'SYX' samples = separate else: axes = 'YX' if not len(axes) <= ndim <= (6 if 'S' in axes else 5): raise OmeXmlError(f'{ndim} dimensions not supported') hiaxes: str = metadata.get('DimensionOrder', 'XYCZT')[:1:-1] axes = hiaxes[(6 if 'S' in axes else 5) - ndim :] + axes assert len(axes) == len(shape) else: # validate axes against shape and stored shape axes = axes.upper() if len(axes) != len(shape): raise ValueError('axes do not match shape') if not ( axes.endswith('YX') or axes.endswith('YXS') or (axes.endswith('YXC') and 'S' not in axes) ): raise OmeXmlError('dimensions must end with YX or YXS') unique = [] for ax in axes: if ax not in 'TZCYXSAPRHEQ': raise OmeXmlError(f'dimension {ax!r} not supported') if ax in unique: raise OmeXmlError(f'multiple {ax!r} dimensions') unique.append(ax) if ndim > (9 if 'S' in axes else 8): raise OmeXmlError('more than 8 dimensions not supported') if contig != 1: samples = contig if ndim < 3: raise ValueError('dimensions do not match stored shape') if axes[-1] == 'C': # allow C axis instead of S if 'S' in axes: raise ValueError('invalid axes') axes = axes.replace('C', 'S') elif axes[-1] != 'S': raise ValueError('axes do not match stored shape') if shape[-1] != contig or shape[-2] != width: raise ValueError('shape does not match stored shape') elif separate != 1: samples = separate if ndim < 3: raise ValueError('dimensions do not match stored shape') if axes[-3] == 'C': # allow C axis instead of S if 'S' in axes: raise ValueError('invalid axes') axes = axes.replace('C', 'S') elif axes[-3] != 'S': raise ValueError('axes do not match stored shape') if shape[-3] != separate or shape[-1] != width: raise ValueError('shape does not match stored shape') if shape[axes.index('X')] != width or shape[axes.index('Y')] != length: raise ValueError('shape does not match stored shape') if 'S' in axes: hiaxes = axes[: min(axes.index('S'), axes.index('Y'))] else: hiaxes = axes[: axes.index('Y')] if any(ax in 'APRHEQ' for ax in hiaxes): # modulo axes modulo = {} dimorder = [] axestype = { 'A': 'angle', 'P': 'phase', 'R': 'tile', 'H': 'lifetime', 'E': 'lambda', 'Q': 'other', } for i, ax in enumerate(hiaxes): if ax in 'APRHEQ': x = hiaxes[i - 1 : i] if x and x in 'TZC': # use previous axis modulo[x] = axestype[ax], shape[i] else: # use next unused axis for x in 'TZC': if x not in dimorder and x not in modulo: modulo[x] = axestype[ax], shape[i] dimorder.append(x) break else: # TODO: support any order of axes, such as, APRTZC raise OmeXmlError('more than 3 modulo dimensions') else: dimorder.append(ax) hiaxes = ''.join(dimorder) # TODO: use user-specified start, stop, step, or labels moduloalong = ''.join( f'' for ax, (axtype, size) in modulo.items() ) annotationref = f'' annotations = ( f'' '' '' f'{moduloalong}' '' '' '' ) self.annotations.append(annotations) else: modulo = {} annotationref = '' hiaxes = hiaxes[::-1] for dimorder in ( metadata.get('DimensionOrder', 'XYCZT'), 'XYCZT', 'XYZCT', 'XYZTC', 'XYCTZ', 'XYTCZ', 'XYTZC', ): if hiaxes in dimorder: break else: raise OmeXmlError(f'dimension order {axes!r} not supported') dimsizes = [] for ax in dimorder: if ax == 'S': continue if ax in axes: size = shape[axes.index(ax)] else: size = 1 if ax == 'C': sizec = size size *= samples if ax in modulo: size *= modulo[ax][1] dimsizes.append(size) sizes = ''.join( f' Size{ax}="{size}"' for ax, size in zip(dimorder, dimsizes) ) # verify DimensionOrder in metadata is compatible if 'DimensionOrder' in metadata: omedimorder = metadata['DimensionOrder'] omedimorder = ''.join( ax for ax in omedimorder if dimsizes[dimorder.index(ax)] > 1 ) if hiaxes not in omedimorder: raise OmeXmlError( f'metadata DimensionOrder does not match {axes!r}' ) # verify metadata Size values match shape for ax, size in zip(dimorder, dimsizes): if metadata.get(f'Size{ax}', size) != size: raise OmeXmlError( f'metadata Size{ax} does not match {shape!r}' ) dimsizes[dimorder.index('C')] //= samples if planecount != product(dimsizes[2:]): raise ValueError('shape does not match stored shape') plane_list = [] planeattributes = metadata.get('Plane', '') if planeattributes: cztorder = tuple(dimorder[2:].index(ax) for ax in 'CZT') for p in range(planecount): attributes = OmeXml._attributes( planeattributes, p, 'DeltaT', 'DeltaTUnit', 'ExposureTime', 'ExposureTimeUnit', 'PositionX', 'PositionXUnit', 'PositionY', 'PositionYUnit', 'PositionZ', 'PositionZUnit', ) unraveled = numpy.unravel_index(p, dimsizes[2:], order='F') c, z, t = (int(unraveled[i]) for i in cztorder) plane_list.append( f'' ) # TODO: if possible, verify c, z, t match planeattributes planes = ''.join(plane_list) channel_list = [] for c in range(sizec): lightpath = '' # TODO: use LightPath elements from metadata # 'AnnotationRef', # 'DichroicRef', # 'EmissionFilterRef', # 'ExcitationFilterRef' attributes = OmeXml._attributes( metadata.get('Channel', ''), c, 'Name', 'AcquisitionMode', 'Color', 'ContrastMethod', 'EmissionWavelength', 'EmissionWavelengthUnit', 'ExcitationWavelength', 'ExcitationWavelengthUnit', 'Fluor', 'IlluminationType', 'NDFilter', 'PinholeSize', 'PinholeSizeUnit', 'PockelCellSetting', ) channel_list.append( f'' f'{lightpath}' '' ) channels = ''.join(channel_list) # TODO: support more Image elements elements = OmeXml._elements(metadata, 'AcquisitionDate', 'Description') name = OmeXml._attribute(metadata, 'Name', default=f'Image{index}') attributes = OmeXml._attributes( metadata, None, 'SignificantBits', 'PhysicalSizeX', 'PhysicalSizeXUnit', 'PhysicalSizeY', 'PhysicalSizeYUnit', 'PhysicalSizeZ', 'PhysicalSizeZUnit', 'TimeIncrement', 'TimeIncrementUnit', ) if separate > 1 or contig > 1: interleaved = 'false' if separate > 1 else 'true' interleaved = f' Interleaved="{interleaved}"' else: interleaved = '' self.images.append( f'' f'{elements}' f'' f'{channels}' f'' f'{planes}' '' f'{annotationref}' '' ) self._ifd += planecount def tostring(self, *, declaration: bool = False) -> str: """Return OME-XML string. Parameters: declaration: Include XML declaration. """ # TODO: support other top-level elements elements = ''.join(self.elements) images = ''.join(self.images) annotations = ''.join(self.annotations) if annotations: annotations = ( f'{annotations}' ) if declaration: declaration_str = '' else: declaration_str = '' xml = self._xml.format( declaration=declaration_str, images=images, annotations=annotations, elements=elements, ) return xml def __repr__(self) -> str: return f'' def __str__(self) -> str: """Return OME-XML string.""" xml = self.tostring() try: from lxml import etree parser = etree.XMLParser(remove_blank_text=True) tree = etree.fromstring(xml, parser) xml = etree.tostring( tree, encoding='utf-8', pretty_print=True, xml_declaration=True ).decode() except ImportError: pass except Exception as exc: warnings.warn( f' {exc.__class__.__name__}: {exc}', UserWarning, ) return xml @staticmethod def _escape(value: object, /) -> str: """Return escaped string of value.""" if not isinstance(value, str): value = str(value) elif '&' in value or '>' in value or '<' in value: return value value = value.replace('&', '&') value = value.replace('>', '>') value = value.replace('<', '<') return value @staticmethod def _element( metadata: dict[str, Any], name: str, default: str | None = None ) -> str: """Return XML formatted element if name in metadata.""" value = metadata.get(name, default) if value is None: return '' return f'<{name}>{OmeXml._escape(value)}' @staticmethod def _elements(metadata: dict[str, Any], /, *names: str) -> str: """Return XML formatted elements.""" if not metadata: return '' elements = (OmeXml._element(metadata, name) for name in names) return ''.join(e for e in elements if e) @staticmethod def _attribute( metadata: dict[str, Any], name: str, /, index: int | None = None, default: Any = None, ) -> str: """Return XML formatted attribute if name in metadata.""" value = metadata.get(name, default) if value is None: return '' if index is not None: if isinstance(value, (list, tuple)): try: value = value[index] except IndexError as exc: raise IndexError( f'list index out of range for attribute {name!r}' ) from exc elif index > 0: raise TypeError( f'{type(value).__name__!r} is not a list or tuple' ) return f' {name}="{OmeXml._escape(value)}"' @staticmethod def _attributes( metadata: dict[str, Any], index_: int | None, /, *names: str, ) -> str: """Return XML formatted attributes.""" if not metadata: return '' if index_ is None: attributes = (OmeXml._attribute(metadata, name) for name in names) elif isinstance(metadata, (list, tuple)): metadata = metadata[index_] attributes = (OmeXml._attribute(metadata, name) for name in names) elif isinstance(metadata, dict): attributes = ( OmeXml._attribute(metadata, name, index_) for name in names ) return ''.join(a for a in attributes if a) @staticmethod def validate( omexml: str, /, omexsd: bytes | None = None, assert_: bool = True, *, _schema: list[Any] = [], # etree.XMLSchema ) -> bool | None: r"""Return if OME-XML is valid according to XMLSchema. Parameters: omexml: OME-XML string to validate. omexsd: Content of OME-XSD schema to validate against. By default, the 2016-06 OME XMLSchema is downloaded on first run. assert\_: Raise AssertionError if validation fails. _schema: Internal use. Raises: AssertionError: Validation failed and `assert\_` is *True*. """ from lxml import etree if not _schema: if omexsd is None: omexsd_path = os.path.join( os.path.dirname(__file__), 'ome.xsd' ) if os.path.exists(omexsd_path): with open(omexsd_path, 'rb') as fh: omexsd = fh.read() else: import urllib.request with urllib.request.urlopen( 'https://www.openmicroscopy.org/' 'Schemas/OME/2016-06/ome.xsd' ) as fh: omexsd = fh.read() if omexsd.startswith(b'', 1)[-1] try: _schema.append( etree.XMLSchema(etree.fromstring(omexsd.decode())) ) except Exception: # raise _schema.append(None) if _schema and _schema[0] is not None: if omexml.startswith('', 1)[-1] tree = etree.fromstring(omexml) if assert_: _schema[0].assert_(tree) return True return bool(_schema[0].validate(tree)) return None @final class CompressionCodec(collections.abc.Mapping): """Map :py:class:`COMPRESSION` value to encode or decode function. Parameters: encode: If *True*, return encode functions, else decode functions. """ _codecs: dict[int, Callable[..., Any]] _encode: bool def __init__(self, encode: bool) -> None: self._codecs = {1: identityfunc} self._encode = bool(encode) def __getitem__(self, key: int, /) -> Callable[..., Any]: if key in self._codecs: return self._codecs[key] codec: Callable[..., Any] try: # TODO: enable CCITTRLE decoder for future imagecodecs # if key == 2: # if self._encode: # codec = imagecodecs.ccittrle_encode # else: # codec = imagecodecs.ccittrle_decode if key == 5: if self._encode: codec = imagecodecs.lzw_encode else: codec = imagecodecs.lzw_decode elif key in {6, 7, 33007}: if self._encode: if key in {6, 33007}: raise NotImplementedError codec = imagecodecs.jpeg_encode else: codec = imagecodecs.jpeg_decode elif key in {8, 32946, 50013}: if ( hasattr(imagecodecs, 'DEFLATE') and imagecodecs.DEFLATE.available ): # imagecodecs built with deflate if self._encode: codec = imagecodecs.deflate_encode else: codec = imagecodecs.deflate_decode elif ( hasattr(imagecodecs, 'ZLIB') and imagecodecs.ZLIB.available ): if self._encode: codec = imagecodecs.zlib_encode else: codec = imagecodecs.zlib_decode else: # imagecodecs built without zlib try: from . import _imagecodecs except ImportError: import _imagecodecs # type: ignore if self._encode: codec = _imagecodecs.zlib_encode else: codec = _imagecodecs.zlib_decode elif key == 32773: if self._encode: codec = imagecodecs.packbits_encode else: codec = imagecodecs.packbits_decode elif key in {33003, 33004, 33005, 34712}: if self._encode: codec = imagecodecs.jpeg2k_encode else: codec = imagecodecs.jpeg2k_decode elif key == 34887: if self._encode: codec = imagecodecs.lerc_encode else: codec = imagecodecs.lerc_decode elif key == 34892: # DNG lossy if self._encode: codec = imagecodecs.jpeg8_encode else: codec = imagecodecs.jpeg8_decode elif key == 34925: if hasattr(imagecodecs, 'LZMA') and imagecodecs.LZMA.available: if self._encode: codec = imagecodecs.lzma_encode else: codec = imagecodecs.lzma_decode else: # imagecodecs built without lzma try: from . import _imagecodecs except ImportError: import _imagecodecs # type: ignore if self._encode: codec = _imagecodecs.lzma_encode else: codec = _imagecodecs.lzma_decode elif key == 34933: if self._encode: codec = imagecodecs.png_encode else: codec = imagecodecs.png_decode elif key in {34934, 22610}: if self._encode: codec = imagecodecs.jpegxr_encode else: codec = imagecodecs.jpegxr_decode elif key == 48124: if self._encode: codec = imagecodecs.jetraw_encode else: codec = imagecodecs.jetraw_decode elif key in {50000, 34926}: # 34926 deprecated if self._encode: codec = imagecodecs.zstd_encode else: codec = imagecodecs.zstd_decode elif key in {50001, 34927}: # 34927 deprecated if self._encode: codec = imagecodecs.webp_encode else: codec = imagecodecs.webp_decode elif key in {65000, 65001, 65002} and not self._encode: codec = imagecodecs.eer_decode elif key in {50002, 52546}: if self._encode: codec = imagecodecs.jpegxl_encode else: codec = imagecodecs.jpegxl_decode else: try: msg = f'{COMPRESSION(key)!r} not supported' except ValueError: msg = f'{key} is not a known COMPRESSION' raise KeyError(msg) except (AttributeError, ImportError) as exc: raise KeyError( f'{COMPRESSION(key)!r} ' "requires the 'imagecodecs' package" ) from exc except NotImplementedError as exc: raise KeyError(f'{COMPRESSION(key)!r} not implemented') from exc self._codecs[key] = codec return codec def __contains__(self, key: Any, /) -> bool: try: self[key] except KeyError: return False return True def __iter__(self) -> Iterator[int]: yield 1 # dummy def __len__(self) -> int: return 1 # dummy @final class PredictorCodec(collections.abc.Mapping): """Map :py:class:`PREDICTOR` value to encode or decode function. Parameters: encode: If *True*, return encode functions, else decode functions. """ _codecs: dict[int, Callable[..., Any]] _encode: bool def __init__(self, encode: bool) -> None: self._codecs = {1: identityfunc} self._encode = bool(encode) def __getitem__(self, key: int, /) -> Callable[..., Any]: if key in self._codecs: return self._codecs[key] codec: Callable[..., Any] try: if key == 2: if self._encode: codec = imagecodecs.delta_encode else: codec = imagecodecs.delta_decode elif key == 3: if self._encode: codec = imagecodecs.floatpred_encode else: codec = imagecodecs.floatpred_decode elif key == 34892: if self._encode: def codec(data, axis=-1, out=None): return imagecodecs.delta_encode( data, axis=axis, out=out, dist=2 ) else: def codec(data, axis=-1, out=None): return imagecodecs.delta_decode( data, axis=axis, out=out, dist=2 ) elif key == 34893: if self._encode: def codec(data, axis=-1, out=None): return imagecodecs.delta_encode( data, axis=axis, out=out, dist=4 ) else: def codec(data, axis=-1, out=None): return imagecodecs.delta_decode( data, axis=axis, out=out, dist=4 ) elif key == 34894: if self._encode: def codec(data, axis=-1, out=None): return imagecodecs.floatpred_encode( data, axis=axis, out=out, dist=2 ) else: def codec(data, axis=-1, out=None): return imagecodecs.floatpred_decode( data, axis=axis, out=out, dist=2 ) elif key == 34895: if self._encode: def codec(data, axis=-1, out=None): return imagecodecs.floatpred_encode( data, axis=axis, out=out, dist=4 ) else: def codec(data, axis=-1, out=None): return imagecodecs.floatpred_decode( data, axis=axis, out=out, dist=4 ) else: raise KeyError(f'{key} is not a known PREDICTOR') except AttributeError as exc: raise KeyError( f'{PREDICTOR(key)!r}' " requires the 'imagecodecs' package" ) from exc except NotImplementedError as exc: raise KeyError(f'{PREDICTOR(key)!r} not implemented') from exc self._codecs[key] = codec return codec def __contains__(self, key: Any, /) -> bool: try: self[key] except KeyError: return False return True def __iter__(self) -> Iterator[int]: yield 1 # dummy def __len__(self) -> int: return 1 # dummy class DATATYPE(enum.IntEnum): """TIFF tag data types.""" BYTE = 1 """8-bit unsigned integer.""" ASCII = 2 """8-bit byte with last byte null, containing 7-bit ASCII code.""" SHORT = 3 """16-bit unsigned integer.""" LONG = 4 """32-bit unsigned integer.""" RATIONAL = 5 """Two 32-bit unsigned integers, numerator and denominator of fraction.""" SBYTE = 6 """8-bit signed integer.""" UNDEFINED = 7 """8-bit byte that may contain anything.""" SSHORT = 8 """16-bit signed integer.""" SLONG = 9 """32-bit signed integer.""" SRATIONAL = 10 """Two 32-bit signed integers, numerator and denominator of fraction.""" FLOAT = 11 """Single precision (4-byte) IEEE format.""" DOUBLE = 12 """Double precision (8-byte) IEEE format.""" IFD = 13 """Unsigned 4 byte IFD offset.""" UNICODE = 14 COMPLEX = 15 LONG8 = 16 """Unsigned 8 byte integer (BigTIFF).""" SLONG8 = 17 """Signed 8 byte integer (BigTIFF).""" IFD8 = 18 """Unsigned 8 byte IFD offset (BigTIFF).""" class COMPRESSION(enum.IntEnum): """Values of Compression tag. Compression scheme used on image data. """ NONE = 1 """No compression (default).""" CCITTRLE = 2 # CCITT 1D CCITT_T4 = 3 # T4/Group 3 Fax CCITT_T6 = 4 # T6/Group 4 Fax LZW = 5 """Lempel-Ziv-Welch.""" OJPEG = 6 # old-style JPEG JPEG = 7 """New style JPEG.""" ADOBE_DEFLATE = 8 """Deflate, aka ZLIB.""" JBIG_BW = 9 # VC5 JBIG_COLOR = 10 JPEG_99 = 99 KODAK_262 = 262 JPEGXR_NDPI = 22610 """JPEG XR (Hammatsu NDPI).""" NEXT = 32766 SONY_ARW = 32767 PACKED_RAW = 32769 SAMSUNG_SRW = 32770 CCIRLEW = 32771 # Word-aligned 1D Huffman compression SAMSUNG_SRW2 = 32772 PACKBITS = 32773 """PackBits, aka Macintosh RLE.""" THUNDERSCAN = 32809 IT8CTPAD = 32895 IT8LW = 32896 IT8MP = 32897 IT8BL = 32898 PIXARFILM = 32908 PIXARLOG = 32909 DEFLATE = 32946 DCS = 32947 APERIO_JP2000_YCBC = 33003 # Matrox libraries """JPEG 2000 YCbCr (Leica Aperio).""" JPEG_2000_LOSSY = 33004 """Lossy JPEG 2000 (Bio-Formats).""" APERIO_JP2000_RGB = 33005 # Kakadu libraries """JPEG 2000 RGB (Leica Aperio).""" ALT_JPEG = 33007 """JPEG (Bio-Formats).""" # PANASONIC_RAW1 = 34316 # PANASONIC_RAW2 = 34826 # PANASONIC_RAW3 = 34828 # PANASONIC_RAW4 = 34830 JBIG = 34661 SGILOG = 34676 # LogLuv32 SGILOG24 = 34677 JPEG2000 = 34712 """JPEG 2000.""" NIKON_NEF = 34713 JBIG2 = 34715 MDI_BINARY = 34718 # Microsoft Document Imaging MDI_PROGRESSIVE = 34719 # Microsoft Document Imaging MDI_VECTOR = 34720 # Microsoft Document Imaging LERC = 34887 """ESRI Limited Error Raster Compression.""" JPEG_LOSSY = 34892 # DNG LZMA = 34925 """Lempel-Ziv-Markov chain Algorithm.""" ZSTD_DEPRECATED = 34926 WEBP_DEPRECATED = 34927 PNG = 34933 # Objective Pathology Services """Portable Network Graphics (Zoomable Image File format).""" JPEGXR = 34934 """JPEG XR (Zoomable Image File format).""" JETRAW = 48124 """Jetraw by Dotphoton.""" ZSTD = 50000 """Zstandard.""" WEBP = 50001 """WebP.""" JPEGXL = 50002 # GDAL """JPEG XL.""" PIXTIFF = 50013 """ZLIB (Atalasoft).""" JPEGXL_DNG = 52546 """JPEG XL (DNG).""" EER_V0 = 65000 # FIXED82 Thermo Fisher Scientific EER_V1 = 65001 # FIXED72 Thermo Fisher Scientific EER_V2 = 65002 # VARIABLE Thermo Fisher Scientific # KODAK_DCR = 65000 # PENTAX_PEF = 65535 def __bool__(self) -> bool: return self > 1 class PREDICTOR(enum.IntEnum): """Values of Predictor tag. A mathematical operator that is applied to the image data before compression. """ NONE = 1 """No prediction scheme used (default).""" HORIZONTAL = 2 """Horizontal differencing.""" FLOATINGPOINT = 3 """Floating-point horizontal differencing.""" HORIZONTALX2 = 34892 # DNG HORIZONTALX4 = 34893 FLOATINGPOINTX2 = 34894 FLOATINGPOINTX4 = 34895 def __bool__(self) -> bool: return self > 1 class PHOTOMETRIC(enum.IntEnum): """Values of PhotometricInterpretation tag. The color space of the image. """ MINISWHITE = 0 """For bilevel and grayscale images, 0 is imaged as white.""" MINISBLACK = 1 """For bilevel and grayscale images, 0 is imaged as black.""" RGB = 2 """Chroma components are Red, Green, Blue.""" PALETTE = 3 """Single chroma component is index into colormap.""" MASK = 4 SEPARATED = 5 """Chroma components are Cyan, Magenta, Yellow, and Key (black).""" YCBCR = 6 """Chroma components are Luma, blue-difference, and red-difference.""" CIELAB = 8 ICCLAB = 9 ITULAB = 10 CFA = 32803 """Color Filter Array.""" LOGL = 32844 LOGLUV = 32845 LINEAR_RAW = 34892 DEPTH_MAP = 51177 # DNG 1.5 SEMANTIC_MASK = 52527 # DNG 1.6 class FILETYPE(enum.IntFlag): """Values of NewSubfileType tag. A general indication of the kind of the image. """ UNDEFINED = 0 """Image is full-resolution (default).""" REDUCEDIMAGE = 1 """Image is reduced-resolution version of another image.""" PAGE = 2 """Image is single page of multi-page image.""" MASK = 4 """Image is transparency mask for another image.""" MACRO = 8 # Aperio SVS, or DNG Depth map """Image is MACRO image (SVS) or depth map for another image (DNG).""" ENHANCED = 16 # DNG """Image contains enhanced image (DNG).""" DNG = 65536 # 65537: Alternative, 65540: Semantic mask class OFILETYPE(enum.IntEnum): """Values of deprecated SubfileType tag.""" UNDEFINED = 0 IMAGE = 1 # full-resolution image REDUCEDIMAGE = 2 # reduced-resolution image PAGE = 3 # single page of multi-page image class FILLORDER(enum.IntEnum): """Values of FillOrder tag. The logical order of bits within a byte. """ MSB2LSB = 1 """Pixel values are stored in higher-order bits of byte (default).""" LSB2MSB = 2 """Pixels values are stored in lower-order bits of byte.""" class ORIENTATION(enum.IntEnum): """Values of Orientation tag. The orientation of the image with respect to the rows and columns. """ TOPLEFT = 1 # default TOPRIGHT = 2 BOTRIGHT = 3 BOTLEFT = 4 LEFTTOP = 5 RIGHTTOP = 6 RIGHTBOT = 7 LEFTBOT = 8 class PLANARCONFIG(enum.IntEnum): """Values of PlanarConfiguration tag. Specifies how components of each pixel are stored. """ CONTIG = 1 """Chunky, component values are stored contiguously (default).""" SEPARATE = 2 """Planar, component values are stored in separate planes.""" class RESUNIT(enum.IntEnum): """Values of ResolutionUnit tag. The unit of measurement for XResolution and YResolution. """ NONE = 1 """No absolute unit of measurement.""" INCH = 2 """Inch (default).""" CENTIMETER = 3 """Centimeter.""" MILLIMETER = 4 """Millimeter (DNG).""" MICROMETER = 5 """Micrometer (DNG).""" def __bool__(self) -> bool: return self > 1 class EXTRASAMPLE(enum.IntEnum): """Values of ExtraSamples tag. Interpretation of extra components in a pixel. """ UNSPECIFIED = 0 """Unspecified data.""" ASSOCALPHA = 1 """Associated alpha data with premultiplied color.""" UNASSALPHA = 2 """Unassociated alpha data.""" class SAMPLEFORMAT(enum.IntEnum): """Values of SampleFormat tag. Data type of samples in a pixel. """ UINT = 1 """Unsigned integer.""" INT = 2 """Signed integer.""" IEEEFP = 3 """IEEE floating-point""" VOID = 4 """Undefined.""" COMPLEXINT = 5 """Complex integer.""" COMPLEXIEEEFP = 6 """Complex floating-point.""" class CHUNKMODE(enum.IntEnum): """ZarrStore chunk modes. Specifies how to chunk data in Zarr stores. """ STRILE = 0 """Chunk is strip or tile.""" PLANE = 1 """Chunk is image plane.""" PAGE = 2 """Chunk is image in page.""" FILE = 3 """Chunk is image in file.""" # class THRESHOLD(enum.IntEnum): # BILEVEL = 1 # HALFTONE = 2 # ERRORDIFFUSE = 3 # # class GRAYRESPONSEUNIT(enum.IntEnum): # _10S = 1 # _100S = 2 # _1000S = 3 # _10000S = 4 # _100000S = 5 # # class COLORRESPONSEUNIT(enum.IntEnum): # _10S = 1 # _100S = 2 # _1000S = 3 # _10000S = 4 # _100000S = 5 # # class GROUP4OPT(enum.IntEnum): # UNCOMPRESSED = 2 class _TIFF: """Delay-loaded constants, accessible via :py:attr:`TIFF` instance.""" @cached_property def CLASSIC_LE(self) -> TiffFormat: """32-bit little-endian TIFF format.""" return TiffFormat( version=42, byteorder='<', offsetsize=4, offsetformat=' TiffFormat: """32-bit big-endian TIFF format.""" return TiffFormat( version=42, byteorder='>', offsetsize=4, offsetformat='>I', tagnosize=2, tagnoformat='>H', tagsize=12, tagformat1='>HH', tagformat2='>I4s', tagoffsetthreshold=4, ) @cached_property def BIG_LE(self) -> TiffFormat: """64-bit little-endian TIFF format.""" return TiffFormat( version=43, byteorder='<', offsetsize=8, offsetformat=' TiffFormat: """64-bit big-endian TIFF format.""" return TiffFormat( version=43, byteorder='>', offsetsize=8, offsetformat='>Q', tagnosize=8, tagnoformat='>Q', tagsize=20, tagformat1='>HH', tagformat2='>Q8s', tagoffsetthreshold=8, ) @cached_property def NDPI_LE(self) -> TiffFormat: """32-bit little-endian TIFF format with 64-bit offsets.""" return TiffFormat( version=42, byteorder='<', offsetsize=8, # NDPI uses 8 bytes IFD and tag offsets offsetformat=' TiffTagRegistry: """Registry of TIFF tag codes and names from TIFF6, TIFF/EP, EXIF.""" # TODO: divide into baseline, exif, private, ... tags return TiffTagRegistry( ( (11, 'ProcessingSoftware'), (254, 'NewSubfileType'), (255, 'SubfileType'), (256, 'ImageWidth'), (257, 'ImageLength'), (258, 'BitsPerSample'), (259, 'Compression'), (262, 'PhotometricInterpretation'), (263, 'Thresholding'), (264, 'CellWidth'), (265, 'CellLength'), (266, 'FillOrder'), (269, 'DocumentName'), (270, 'ImageDescription'), (271, 'Make'), (272, 'Model'), (273, 'StripOffsets'), (274, 'Orientation'), (277, 'SamplesPerPixel'), (278, 'RowsPerStrip'), (279, 'StripByteCounts'), (280, 'MinSampleValue'), (281, 'MaxSampleValue'), (282, 'XResolution'), (283, 'YResolution'), (284, 'PlanarConfiguration'), (285, 'PageName'), (286, 'XPosition'), (287, 'YPosition'), (288, 'FreeOffsets'), (289, 'FreeByteCounts'), (290, 'GrayResponseUnit'), (291, 'GrayResponseCurve'), (292, 'T4Options'), (293, 'T6Options'), (296, 'ResolutionUnit'), (297, 'PageNumber'), (300, 'ColorResponseUnit'), (301, 'TransferFunction'), (305, 'Software'), (306, 'DateTime'), (315, 'Artist'), (316, 'HostComputer'), (317, 'Predictor'), (318, 'WhitePoint'), (319, 'PrimaryChromaticities'), (320, 'ColorMap'), (321, 'HalftoneHints'), (322, 'TileWidth'), (323, 'TileLength'), (324, 'TileOffsets'), (325, 'TileByteCounts'), (326, 'BadFaxLines'), (327, 'CleanFaxData'), (328, 'ConsecutiveBadFaxLines'), (330, 'SubIFDs'), (332, 'InkSet'), (333, 'InkNames'), (334, 'NumberOfInks'), (336, 'DotRange'), (337, 'TargetPrinter'), (338, 'ExtraSamples'), (339, 'SampleFormat'), (340, 'SMinSampleValue'), (341, 'SMaxSampleValue'), (342, 'TransferRange'), (343, 'ClipPath'), (344, 'XClipPathUnits'), (345, 'YClipPathUnits'), (346, 'Indexed'), (347, 'JPEGTables'), (351, 'OPIProxy'), (400, 'GlobalParametersIFD'), (401, 'ProfileType'), (402, 'FaxProfile'), (403, 'CodingMethods'), (404, 'VersionYear'), (405, 'ModeNumber'), (433, 'Decode'), (434, 'DefaultImageColor'), (435, 'T82Options'), (437, 'JPEGTables'), # 347 (512, 'JPEGProc'), (513, 'JPEGInterchangeFormat'), (514, 'JPEGInterchangeFormatLength'), (515, 'JPEGRestartInterval'), (517, 'JPEGLosslessPredictors'), (518, 'JPEGPointTransforms'), (519, 'JPEGQTables'), (520, 'JPEGDCTables'), (521, 'JPEGACTables'), (529, 'YCbCrCoefficients'), (530, 'YCbCrSubSampling'), (531, 'YCbCrPositioning'), (532, 'ReferenceBlackWhite'), (559, 'StripRowCounts'), (700, 'XMP'), # XMLPacket (769, 'GDIGamma'), # GDI+ (770, 'ICCProfileDescriptor'), # GDI+ (771, 'SRGBRenderingIntent'), # GDI+ (800, 'ImageTitle'), # GDI+ (907, 'SiffCompress'), # https://github.com/MaimonLab/SiffPy (999, 'USPTO_Miscellaneous'), (4864, 'AndorId'), # TODO, Andor Technology 4864 - 5030 (4869, 'AndorTemperature'), (4876, 'AndorExposureTime'), (4878, 'AndorKineticCycleTime'), (4879, 'AndorAccumulations'), (4881, 'AndorAcquisitionCycleTime'), (4882, 'AndorReadoutTime'), (4884, 'AndorPhotonCounting'), (4885, 'AndorEmDacLevel'), (4890, 'AndorFrames'), (4896, 'AndorHorizontalFlip'), (4897, 'AndorVerticalFlip'), (4898, 'AndorClockwise'), (4899, 'AndorCounterClockwise'), (4904, 'AndorVerticalClockVoltage'), (4905, 'AndorVerticalShiftSpeed'), (4907, 'AndorPreAmpSetting'), (4908, 'AndorCameraSerial'), (4911, 'AndorActualTemperature'), (4912, 'AndorBaselineClamp'), (4913, 'AndorPrescans'), (4914, 'AndorModel'), (4915, 'AndorChipSizeX'), (4916, 'AndorChipSizeY'), (4944, 'AndorBaselineOffset'), (4966, 'AndorSoftwareVersion'), (18246, 'Rating'), (18247, 'XP_DIP_XML'), (18248, 'StitchInfo'), (18249, 'RatingPercent'), (20481, 'ResolutionXUnit'), # GDI+ (20482, 'ResolutionYUnit'), # GDI+ (20483, 'ResolutionXLengthUnit'), # GDI+ (20484, 'ResolutionYLengthUnit'), # GDI+ (20485, 'PrintFlags'), # GDI+ (20486, 'PrintFlagsVersion'), # GDI+ (20487, 'PrintFlagsCrop'), # GDI+ (20488, 'PrintFlagsBleedWidth'), # GDI+ (20489, 'PrintFlagsBleedWidthScale'), # GDI+ (20490, 'HalftoneLPI'), # GDI+ (20491, 'HalftoneLPIUnit'), # GDI+ (20492, 'HalftoneDegree'), # GDI+ (20493, 'HalftoneShape'), # GDI+ (20494, 'HalftoneMisc'), # GDI+ (20495, 'HalftoneScreen'), # GDI+ (20496, 'JPEGQuality'), # GDI+ (20497, 'GridSize'), # GDI+ (20498, 'ThumbnailFormat'), # GDI+ (20499, 'ThumbnailWidth'), # GDI+ (20500, 'ThumbnailHeight'), # GDI+ (20501, 'ThumbnailColorDepth'), # GDI+ (20502, 'ThumbnailPlanes'), # GDI+ (20503, 'ThumbnailRawBytes'), # GDI+ (20504, 'ThumbnailSize'), # GDI+ (20505, 'ThumbnailCompressedSize'), # GDI+ (20506, 'ColorTransferFunction'), # GDI+ (20507, 'ThumbnailData'), (20512, 'ThumbnailImageWidth'), # GDI+ (20513, 'ThumbnailImageHeight'), # GDI+ (20514, 'ThumbnailBitsPerSample'), # GDI+ (20515, 'ThumbnailCompression'), (20516, 'ThumbnailPhotometricInterp'), # GDI+ (20517, 'ThumbnailImageDescription'), # GDI+ (20518, 'ThumbnailEquipMake'), # GDI+ (20519, 'ThumbnailEquipModel'), # GDI+ (20520, 'ThumbnailStripOffsets'), # GDI+ (20521, 'ThumbnailOrientation'), # GDI+ (20522, 'ThumbnailSamplesPerPixel'), # GDI+ (20523, 'ThumbnailRowsPerStrip'), # GDI+ (20524, 'ThumbnailStripBytesCount'), # GDI+ (20525, 'ThumbnailResolutionX'), (20526, 'ThumbnailResolutionY'), (20527, 'ThumbnailPlanarConfig'), # GDI+ (20528, 'ThumbnailResolutionUnit'), (20529, 'ThumbnailTransferFunction'), (20530, 'ThumbnailSoftwareUsed'), # GDI+ (20531, 'ThumbnailDateTime'), # GDI+ (20532, 'ThumbnailArtist'), # GDI+ (20533, 'ThumbnailWhitePoint'), # GDI+ (20534, 'ThumbnailPrimaryChromaticities'), # GDI+ (20535, 'ThumbnailYCbCrCoefficients'), # GDI+ (20536, 'ThumbnailYCbCrSubsampling'), # GDI+ (20537, 'ThumbnailYCbCrPositioning'), (20538, 'ThumbnailRefBlackWhite'), # GDI+ (20539, 'ThumbnailCopyRight'), # GDI+ (20545, 'InteroperabilityIndex'), (20546, 'InteroperabilityVersion'), (20624, 'LuminanceTable'), (20625, 'ChrominanceTable'), (20736, 'FrameDelay'), # GDI+ (20737, 'LoopCount'), # GDI+ (20738, 'GlobalPalette'), # GDI+ (20739, 'IndexBackground'), # GDI+ (20740, 'IndexTransparent'), # GDI+ (20752, 'PixelUnit'), # GDI+ (20753, 'PixelPerUnitX'), # GDI+ (20754, 'PixelPerUnitY'), # GDI+ (20755, 'PaletteHistogram'), # GDI+ (28672, 'SonyRawFileType'), # Sony ARW (28722, 'VignettingCorrParams'), # Sony ARW (28725, 'ChromaticAberrationCorrParams'), # Sony ARW (28727, 'DistortionCorrParams'), # Sony ARW # Private tags >= 32768 (32781, 'ImageID'), (32931, 'WangTag1'), (32932, 'WangAnnotation'), (32933, 'WangTag3'), (32934, 'WangTag4'), (32953, 'ImageReferencePoints'), (32954, 'RegionXformTackPoint'), (32955, 'WarpQuadrilateral'), (32956, 'AffineTransformMat'), (32995, 'Matteing'), (32996, 'DataType'), # use SampleFormat (32997, 'ImageDepth'), (32998, 'TileDepth'), (33300, 'ImageFullWidth'), (33301, 'ImageFullLength'), (33302, 'TextureFormat'), (33303, 'TextureWrapModes'), (33304, 'FieldOfViewCotangent'), (33305, 'MatrixWorldToScreen'), (33306, 'MatrixWorldToCamera'), (33405, 'Model2'), (33421, 'CFARepeatPatternDim'), (33422, 'CFAPattern'), (33423, 'BatteryLevel'), (33424, 'KodakIFD'), (33434, 'ExposureTime'), (33437, 'FNumber'), (33432, 'Copyright'), (33445, 'MDFileTag'), (33446, 'MDScalePixel'), (33447, 'MDColorTable'), (33448, 'MDLabName'), (33449, 'MDSampleInfo'), (33450, 'MDPrepDate'), (33451, 'MDPrepTime'), (33452, 'MDFileUnits'), (33465, 'NiffRotation'), # NIFF (33466, 'NiffNavyCompression'), # NIFF (33467, 'NiffTileIndex'), # NIFF (33471, 'OlympusINI'), (33550, 'ModelPixelScaleTag'), (33560, 'OlympusSIS'), # see also 33471 and 34853 (33589, 'AdventScale'), (33590, 'AdventRevision'), (33628, 'UIC1tag'), # Metamorph Universal Imaging Corp STK (33629, 'UIC2tag'), (33630, 'UIC3tag'), (33631, 'UIC4tag'), (33723, 'IPTCNAA'), (33858, 'ExtendedTagsOffset'), # DEFF points IFD with tags (33918, 'IntergraphPacketData'), # INGRPacketDataTag (33919, 'IntergraphFlagRegisters'), # INGRFlagRegisters (33920, 'IntergraphMatrixTag'), # IrasBTransformationMatrix (33921, 'INGRReserved'), (33922, 'ModelTiepointTag'), (33923, 'LeicaMagic'), (34016, 'Site'), # 34016..34032 ANSI IT8 TIFF/IT (34017, 'ColorSequence'), (34018, 'IT8Header'), (34019, 'RasterPadding'), (34020, 'BitsPerRunLength'), (34021, 'BitsPerExtendedRunLength'), (34022, 'ColorTable'), (34023, 'ImageColorIndicator'), (34024, 'BackgroundColorIndicator'), (34025, 'ImageColorValue'), (34026, 'BackgroundColorValue'), (34027, 'PixelIntensityRange'), (34028, 'TransparencyIndicator'), (34029, 'ColorCharacterization'), (34030, 'HCUsage'), (34031, 'TrapIndicator'), (34032, 'CMYKEquivalent'), (34118, 'CZ_SEM'), # Zeiss SEM (34152, 'AFCP_IPTC'), (34232, 'PixelMagicJBIGOptions'), # EXIF, also TI FrameCount (34263, 'JPLCartoIFD'), (34122, 'IPLAB'), # number of images (34264, 'ModelTransformationTag'), (34306, 'WB_GRGBLevels'), # Leaf MOS (34310, 'LeafData'), (34361, 'MM_Header'), (34362, 'MM_Stamp'), (34363, 'MM_Unknown'), (34377, 'ImageResources'), # Photoshop (34386, 'MM_UserBlock'), (34412, 'CZ_LSMINFO'), (34665, 'ExifTag'), (34675, 'InterColorProfile'), # ICCProfile (34680, 'FEI_SFEG'), # (34682, 'FEI_HELIOS'), # (34683, 'FEI_TITAN'), # (34687, 'FXExtensions'), (34688, 'MultiProfiles'), (34689, 'SharedData'), (34690, 'T88Options'), (34710, 'MarCCD'), # offset to MarCCD header (34732, 'ImageLayer'), (34735, 'GeoKeyDirectoryTag'), (34736, 'GeoDoubleParamsTag'), (34737, 'GeoAsciiParamsTag'), (34750, 'JBIGOptions'), (34821, 'PIXTIFF'), # ? Pixel Translations Inc (34850, 'ExposureProgram'), (34852, 'SpectralSensitivity'), (34853, 'GPSTag'), # GPSIFD also OlympusSIS2 (34853, 'OlympusSIS2'), (34855, 'ISOSpeedRatings'), (34855, 'PhotographicSensitivity'), (34856, 'OECF'), # optoelectric conversion factor (34857, 'Interlace'), # TIFF/EP (34858, 'TimeZoneOffset'), # TIFF/EP (34859, 'SelfTimerMode'), # TIFF/EP (34864, 'SensitivityType'), (34865, 'StandardOutputSensitivity'), (34866, 'RecommendedExposureIndex'), (34867, 'ISOSpeed'), (34868, 'ISOSpeedLatitudeyyy'), (34869, 'ISOSpeedLatitudezzz'), (34908, 'HylaFAXFaxRecvParams'), (34909, 'HylaFAXFaxSubAddress'), (34910, 'HylaFAXFaxRecvTime'), (34911, 'FaxDcs'), (34929, 'FedexEDR'), (34954, 'LeafSubIFD'), (34959, 'Aphelion1'), (34960, 'Aphelion2'), (34961, 'AphelionInternal'), # ADCIS (36864, 'ExifVersion'), (36867, 'DateTimeOriginal'), (36868, 'DateTimeDigitized'), (36873, 'GooglePlusUploadCode'), (36880, 'OffsetTime'), (36881, 'OffsetTimeOriginal'), (36882, 'OffsetTimeDigitized'), # TODO, Pilatus/CHESS/TV6 36864..37120 conflicting with Exif (36864, 'TVX_Unknown'), (36865, 'TVX_NumExposure'), (36866, 'TVX_NumBackground'), (36867, 'TVX_ExposureTime'), (36868, 'TVX_BackgroundTime'), (36870, 'TVX_Unknown'), (36873, 'TVX_SubBpp'), (36874, 'TVX_SubWide'), (36875, 'TVX_SubHigh'), (36876, 'TVX_BlackLevel'), (36877, 'TVX_DarkCurrent'), (36878, 'TVX_ReadNoise'), (36879, 'TVX_DarkCurrentNoise'), (36880, 'TVX_BeamMonitor'), (37120, 'TVX_UserVariables'), # A/D values (37121, 'ComponentsConfiguration'), (37122, 'CompressedBitsPerPixel'), (37377, 'ShutterSpeedValue'), (37378, 'ApertureValue'), (37379, 'BrightnessValue'), (37380, 'ExposureBiasValue'), (37381, 'MaxApertureValue'), (37382, 'SubjectDistance'), (37383, 'MeteringMode'), (37384, 'LightSource'), (37385, 'Flash'), (37386, 'FocalLength'), (37387, 'FlashEnergy'), # TIFF/EP (37388, 'SpatialFrequencyResponse'), # TIFF/EP (37389, 'Noise'), # TIFF/EP (37390, 'FocalPlaneXResolution'), # TIFF/EP (37391, 'FocalPlaneYResolution'), # TIFF/EP (37392, 'FocalPlaneResolutionUnit'), # TIFF/EP (37393, 'ImageNumber'), # TIFF/EP (37394, 'SecurityClassification'), # TIFF/EP (37395, 'ImageHistory'), # TIFF/EP (37396, 'SubjectLocation'), # TIFF/EP (37397, 'ExposureIndex'), # TIFF/EP (37398, 'TIFFEPStandardID'), # TIFF/EP (37399, 'SensingMethod'), # TIFF/EP (37434, 'CIP3DataFile'), (37435, 'CIP3Sheet'), (37436, 'CIP3Side'), (37439, 'StoNits'), (37500, 'MakerNote'), (37510, 'UserComment'), (37520, 'SubsecTime'), (37521, 'SubsecTimeOriginal'), (37522, 'SubsecTimeDigitized'), (37679, 'MODIText'), # Microsoft Office Document Imaging (37680, 'MODIOLEPropertySetStorage'), (37681, 'MODIPositioning'), (37701, 'AgilentBinary'), # private structure (37702, 'AgilentString'), # file description (37706, 'TVIPS'), # offset to TemData structure (37707, 'TVIPS1'), (37708, 'TVIPS2'), # same TemData structure as undefined (37724, 'ImageSourceData'), # Photoshop (37888, 'Temperature'), (37889, 'Humidity'), (37890, 'Pressure'), (37891, 'WaterDepth'), (37892, 'Acceleration'), (37893, 'CameraElevationAngle'), (40000, 'XPos'), # Janelia (40001, 'YPos'), (40002, 'ZPos'), (40001, 'MC_IpWinScal'), # Media Cybernetics (40001, 'RecipName'), # MS FAX (40002, 'RecipNumber'), (40003, 'SenderName'), (40004, 'Routing'), (40005, 'CallerId'), (40006, 'TSID'), (40007, 'CSID'), (40008, 'FaxTime'), (40100, 'MC_IdOld'), (40106, 'MC_Unknown'), (40965, 'InteroperabilityTag'), # InteropOffset (40091, 'XPTitle'), (40092, 'XPComment'), (40093, 'XPAuthor'), (40094, 'XPKeywords'), (40095, 'XPSubject'), (40960, 'FlashpixVersion'), (40961, 'ColorSpace'), (40962, 'PixelXDimension'), (40963, 'PixelYDimension'), (40964, 'RelatedSoundFile'), (40976, 'SamsungRawPointersOffset'), (40977, 'SamsungRawPointersLength'), (41217, 'SamsungRawByteOrder'), (41218, 'SamsungRawUnknown'), (41483, 'FlashEnergy'), (41484, 'SpatialFrequencyResponse'), (41485, 'Noise'), # 37389 (41486, 'FocalPlaneXResolution'), # 37390 (41487, 'FocalPlaneYResolution'), # 37391 (41488, 'FocalPlaneResolutionUnit'), # 37392 (41489, 'ImageNumber'), # 37393 (41490, 'SecurityClassification'), # 37394 (41491, 'ImageHistory'), # 37395 (41492, 'SubjectLocation'), # 37395 (41493, 'ExposureIndex '), # 37397 (41494, 'TIFF-EPStandardID'), (41495, 'SensingMethod'), # 37399 (41728, 'FileSource'), (41729, 'SceneType'), (41730, 'CFAPattern'), # 33422 (41985, 'CustomRendered'), (41986, 'ExposureMode'), (41987, 'WhiteBalance'), (41988, 'DigitalZoomRatio'), (41989, 'FocalLengthIn35mmFilm'), (41990, 'SceneCaptureType'), (41991, 'GainControl'), (41992, 'Contrast'), (41993, 'Saturation'), (41994, 'Sharpness'), (41995, 'DeviceSettingDescription'), (41996, 'SubjectDistanceRange'), (42016, 'ImageUniqueID'), (42032, 'CameraOwnerName'), (42033, 'BodySerialNumber'), (42034, 'LensSpecification'), (42035, 'LensMake'), (42036, 'LensModel'), (42037, 'LensSerialNumber'), (42080, 'CompositeImage'), (42081, 'SourceImageNumberCompositeImage'), (42082, 'SourceExposureTimesCompositeImage'), (42112, 'GDAL_METADATA'), (42113, 'GDAL_NODATA'), (42240, 'Gamma'), (43314, 'NIHImageHeader'), (44992, 'ExpandSoftware'), (44993, 'ExpandLens'), (44994, 'ExpandFilm'), (44995, 'ExpandFilterLens'), (44996, 'ExpandScanner'), (44997, 'ExpandFlashLamp'), (48129, 'PixelFormat'), # HDP and WDP (48130, 'Transformation'), (48131, 'Uncompressed'), (48132, 'ImageType'), (48256, 'ImageWidth'), # 256 (48257, 'ImageHeight'), (48258, 'WidthResolution'), (48259, 'HeightResolution'), (48320, 'ImageOffset'), (48321, 'ImageByteCount'), (48322, 'AlphaOffset'), (48323, 'AlphaByteCount'), (48324, 'ImageDataDiscard'), (48325, 'AlphaDataDiscard'), (50003, 'KodakAPP3'), (50215, 'OceScanjobDescription'), (50216, 'OceApplicationSelector'), (50217, 'OceIdentificationNumber'), (50218, 'OceImageLogicCharacteristics'), (50255, 'Annotations'), (50288, 'MC_Id'), # Media Cybernetics (50289, 'MC_XYPosition'), (50290, 'MC_ZPosition'), (50291, 'MC_XYCalibration'), (50292, 'MC_LensCharacteristics'), (50293, 'MC_ChannelName'), (50294, 'MC_ExcitationWavelength'), (50295, 'MC_TimeStamp'), (50296, 'MC_FrameProperties'), (50341, 'PrintImageMatching'), (50495, 'PCO_RAW'), # TODO, PCO CamWare (50547, 'OriginalFileName'), (50560, 'USPTO_OriginalContentType'), # US Patent Office (50561, 'USPTO_RotationCode'), (50648, 'CR2Unknown1'), (50649, 'CR2Unknown2'), (50656, 'CR2CFAPattern'), (50674, 'LercParameters'), # ESGI 50674 .. 50677 (50706, 'DNGVersion'), # DNG 50706 .. 51114 (50707, 'DNGBackwardVersion'), (50708, 'UniqueCameraModel'), (50709, 'LocalizedCameraModel'), (50710, 'CFAPlaneColor'), (50711, 'CFALayout'), (50712, 'LinearizationTable'), (50713, 'BlackLevelRepeatDim'), (50714, 'BlackLevel'), (50715, 'BlackLevelDeltaH'), (50716, 'BlackLevelDeltaV'), (50717, 'WhiteLevel'), (50718, 'DefaultScale'), (50719, 'DefaultCropOrigin'), (50720, 'DefaultCropSize'), (50721, 'ColorMatrix1'), (50722, 'ColorMatrix2'), (50723, 'CameraCalibration1'), (50724, 'CameraCalibration2'), (50725, 'ReductionMatrix1'), (50726, 'ReductionMatrix2'), (50727, 'AnalogBalance'), (50728, 'AsShotNeutral'), (50729, 'AsShotWhiteXY'), (50730, 'BaselineExposure'), (50731, 'BaselineNoise'), (50732, 'BaselineSharpness'), (50733, 'BayerGreenSplit'), (50734, 'LinearResponseLimit'), (50735, 'CameraSerialNumber'), (50736, 'LensInfo'), (50737, 'ChromaBlurRadius'), (50738, 'AntiAliasStrength'), (50739, 'ShadowScale'), (50740, 'DNGPrivateData'), (50741, 'MakerNoteSafety'), (50752, 'RawImageSegmentation'), (50778, 'CalibrationIlluminant1'), (50779, 'CalibrationIlluminant2'), (50780, 'BestQualityScale'), (50781, 'RawDataUniqueID'), (50784, 'AliasLayerMetadata'), (50827, 'OriginalRawFileName'), (50828, 'OriginalRawFileData'), (50829, 'ActiveArea'), (50830, 'MaskedAreas'), (50831, 'AsShotICCProfile'), (50832, 'AsShotPreProfileMatrix'), (50833, 'CurrentICCProfile'), (50834, 'CurrentPreProfileMatrix'), (50838, 'IJMetadataByteCounts'), (50839, 'IJMetadata'), (50844, 'RPCCoefficientTag'), (50879, 'ColorimetricReference'), (50885, 'SRawType'), (50898, 'PanasonicTitle'), (50899, 'PanasonicTitle2'), (50908, 'RSID'), # DGIWG (50909, 'GEO_METADATA'), # DGIWG XML (50931, 'CameraCalibrationSignature'), (50932, 'ProfileCalibrationSignature'), (50933, 'ProfileIFD'), # EXTRACAMERAPROFILES (50934, 'AsShotProfileName'), (50935, 'NoiseReductionApplied'), (50936, 'ProfileName'), (50937, 'ProfileHueSatMapDims'), (50938, 'ProfileHueSatMapData1'), (50939, 'ProfileHueSatMapData2'), (50940, 'ProfileToneCurve'), (50941, 'ProfileEmbedPolicy'), (50942, 'ProfileCopyright'), (50964, 'ForwardMatrix1'), (50965, 'ForwardMatrix2'), (50966, 'PreviewApplicationName'), (50967, 'PreviewApplicationVersion'), (50968, 'PreviewSettingsName'), (50969, 'PreviewSettingsDigest'), (50970, 'PreviewColorSpace'), (50971, 'PreviewDateTime'), (50972, 'RawImageDigest'), (50973, 'OriginalRawFileDigest'), (50974, 'SubTileBlockSize'), (50975, 'RowInterleaveFactor'), (50981, 'ProfileLookTableDims'), (50982, 'ProfileLookTableData'), (51008, 'OpcodeList1'), (51009, 'OpcodeList2'), (51022, 'OpcodeList3'), (51023, 'FibicsXML'), # (51041, 'NoiseProfile'), (51043, 'TimeCodes'), (51044, 'FrameRate'), (51058, 'TStop'), (51081, 'ReelName'), (51089, 'OriginalDefaultFinalSize'), (51090, 'OriginalBestQualitySize'), (51091, 'OriginalDefaultCropSize'), (51105, 'CameraLabel'), (51107, 'ProfileHueSatMapEncoding'), (51108, 'ProfileLookTableEncoding'), (51109, 'BaselineExposureOffset'), (51110, 'DefaultBlackRender'), (51111, 'NewRawImageDigest'), (51112, 'RawToPreviewGain'), (51113, 'CacheBlob'), (51114, 'CacheVersion'), (51123, 'MicroManagerMetadata'), (51125, 'DefaultUserCrop'), (51159, 'ZIFmetadata'), # Objective Pathology Services (51160, 'ZIFannotations'), # Objective Pathology Services (51177, 'DepthFormat'), (51178, 'DepthNear'), (51179, 'DepthFar'), (51180, 'DepthUnits'), (51181, 'DepthMeasureType'), (51182, 'EnhanceParams'), (52525, 'ProfileGainTableMap'), # DNG 1.6 (52526, 'SemanticName'), # DNG 1.6 (52528, 'SemanticInstanceID'), # DNG 1.6 (52536, 'MaskSubArea'), # DNG 1.6 (52543, 'RGBTables'), # DNG 1.6 (52529, 'CalibrationIlluminant3'), # DNG 1.6 (52531, 'ColorMatrix3'), # DNG 1.6 (52530, 'CameraCalibration3'), # DNG 1.6 (52538, 'ReductionMatrix3'), # DNG 1.6 (52537, 'ProfileHueSatMapData3'), # DNG 1.6 (52532, 'ForwardMatrix3'), # DNG 1.6 (52533, 'IlluminantData1'), # DNG 1.6 (52534, 'IlluminantData2'), # DNG 1.6 (53535, 'IlluminantData3'), # DNG 1.6 (52544, 'ProfileGainTableMap2'), # DNG 1.7 (52547, 'ColumnInterleaveFactor'), # DNG 1.7 (52548, 'ImageSequenceInfo'), # DNG 1.7 (52550, 'ImageStats'), # DNG 1.7 (52551, 'ProfileDynamicRange'), # DNG 1.7 (52552, 'ProfileGroupName'), # DNG 1.7 (52553, 'JXLDistance'), # DNG 1.7 (52554, 'JXLEffort'), # DNG 1.7 (52555, 'JXLDecodeSpeed'), # DNG 1.7 (55000, 'AperioUnknown55000'), (55001, 'AperioMagnification'), (55002, 'AperioMPP'), (55003, 'AperioScanScopeID'), (55004, 'AperioDate'), (59932, 'Padding'), (59933, 'OffsetSchema'), # Reusable Tags 65000-65535 # (65000, 'DimapDocumentXML'), # EER metadata: # (65001, 'AcquisitionMetadata'), # (65002, 'FrameMetadata'), # (65006, 'ImageMetadata'), # (65007, 'PosSkipBits'), # (65008, 'HorzSubBits'), # (65009, 'VertSubBits'), # Photoshop Camera RAW EXIF tags: # (65000, 'OwnerName'), # (65001, 'SerialNumber'), # (65002, 'Lens'), # (65024, 'KodakKDCPrivateIFD'), # (65100, 'RawFile'), # (65101, 'Converter'), # (65102, 'WhiteBalance'), # (65105, 'Exposure'), # (65106, 'Shadows'), # (65107, 'Brightness'), # (65108, 'Contrast'), # (65109, 'Saturation'), # (65110, 'Sharpness'), # (65111, 'Smoothness'), # (65112, 'MoireFilter'), (65200, 'FlexXML'), ) ) @cached_property def TAG_READERS( self, ) -> dict[int, Callable[[FileHandle, ByteOrder, int, int, int], Any]]: # map tag codes to import functions return { 301: read_colormap, 320: read_colormap, # 700: read_bytes, # read_utf8, # 34377: read_bytes, 33723: read_bytes, # 34675: read_bytes, 33628: read_uic1tag, # Universal Imaging Corp STK 33629: read_uic2tag, 33630: read_uic3tag, 33631: read_uic4tag, 34118: read_cz_sem, # Carl Zeiss SEM 34361: read_mm_header, # Olympus FluoView 34362: read_mm_stamp, 34363: read_numpy, # MM_Unknown 34386: read_numpy, # MM_UserBlock 34412: read_cz_lsminfo, # Carl Zeiss LSM 34680: read_fei_metadata, # S-FEG 34682: read_fei_metadata, # Helios NanoLab 37706: read_tvips_header, # TVIPS EMMENU 37724: read_bytes, # ImageSourceData 33923: read_bytes, # read_leica_magic 43314: read_nih_image_header, # 40001: read_bytes, 40100: read_bytes, 50288: read_bytes, 50296: read_bytes, 50839: read_bytes, 51123: read_json, 33471: read_sis_ini, 33560: read_sis, 34665: read_exif_ifd, 34853: read_gps_ifd, # conflicts with OlympusSIS 40965: read_interoperability_ifd, 65426: read_numpy, # NDPI McuStarts 65432: read_numpy, # NDPI McuStartsHighBytes 65439: read_numpy, # NDPI unknown 65459: read_bytes, # NDPI bytes, not string } @cached_property def TAG_LOAD(self) -> frozenset[int]: # tags whose values are not delay loaded return frozenset( ( 258, # BitsPerSample 270, # ImageDescription 273, # StripOffsets 277, # SamplesPerPixel 279, # StripByteCounts 282, # XResolution 283, # YResolution # 301, # TransferFunction 305, # Software # 306, # DateTime # 320, # ColorMap 324, # TileOffsets 325, # TileByteCounts 330, # SubIFDs 338, # ExtraSamples 339, # SampleFormat 347, # JPEGTables 513, # JPEGInterchangeFormat 514, # JPEGInterchangeFormatLength 530, # YCbCrSubSampling 33628, # UIC1tag 42113, # GDAL_NODATA 50838, # IJMetadataByteCounts 50839, # IJMetadata ) ) @cached_property def TAG_FILTERED(self) -> frozenset[int]: # tags filtered from extratags in :py:meth:`TiffWriter.write` return frozenset( ( 256, # ImageWidth 257, # ImageLength 258, # BitsPerSample 259, # Compression 262, # PhotometricInterpretation 266, # FillOrder 273, # StripOffsets 277, # SamplesPerPixel 278, # RowsPerStrip 279, # StripByteCounts 284, # PlanarConfiguration 317, # Predictor 322, # TileWidth 323, # TileLength 324, # TileOffsets 325, # TileByteCounts 330, # SubIFDs, 338, # ExtraSamples 339, # SampleFormat 400, # GlobalParametersIFD 32997, # ImageDepth 32998, # TileDepth 34665, # ExifTag 34853, # GPSTag 40965, # InteroperabilityTag ) ) @cached_property def TAG_TUPLE(self) -> frozenset[int]: # tags whose values must be stored as tuples return frozenset( ( 273, 279, 282, 283, 324, 325, 330, 338, 513, 514, 530, 531, 34736, 50838, ) ) @cached_property def TAG_ATTRIBUTES(self) -> dict[int, str]: # map tag codes to TiffPage attribute names return { 254: 'subfiletype', 256: 'imagewidth', 257: 'imagelength', # 258: 'bitspersample', # set manually 259: 'compression', 262: 'photometric', 266: 'fillorder', 270: 'description', 277: 'samplesperpixel', 278: 'rowsperstrip', 284: 'planarconfig', # 301: 'transferfunction', # delay load 305: 'software', # 320: 'colormap', # delay load 317: 'predictor', 322: 'tilewidth', 323: 'tilelength', 330: 'subifds', 338: 'extrasamples', # 339: 'sampleformat', # set manually 347: 'jpegtables', 530: 'subsampling', 32997: 'imagedepth', 32998: 'tiledepth', } @cached_property def TAG_ENUM(self) -> dict[int, type[enum.Enum]]: # map tag codes to Enums return { 254: FILETYPE, 255: OFILETYPE, 259: COMPRESSION, 262: PHOTOMETRIC, # 263: THRESHOLD, 266: FILLORDER, 274: ORIENTATION, 284: PLANARCONFIG, # 290: GRAYRESPONSEUNIT, # 292: GROUP3OPT # 293: GROUP4OPT 296: RESUNIT, # 300: COLORRESPONSEUNIT, 317: PREDICTOR, 338: EXTRASAMPLE, 339: SAMPLEFORMAT, # 512: JPEGPROC # 531: YCBCRPOSITION } @cached_property def EXIF_TAGS(self) -> TiffTagRegistry: """Registry of EXIF tags, including private Photoshop Camera RAW.""" # 65000 - 65112 Photoshop Camera RAW EXIF tags tags = TiffTagRegistry( ( (65000, 'OwnerName'), (65001, 'SerialNumber'), (65002, 'Lens'), (65100, 'RawFile'), (65101, 'Converter'), (65102, 'WhiteBalance'), (65105, 'Exposure'), (65106, 'Shadows'), (65107, 'Brightness'), (65108, 'Contrast'), (65109, 'Saturation'), (65110, 'Sharpness'), (65111, 'Smoothness'), (65112, 'MoireFilter'), ) ) tags.update(TIFF.TAGS) return tags @cached_property def NDPI_TAGS(self) -> TiffTagRegistry: """Registry of private TIFF tags for Hamamatsu NDPI (65420-65458).""" # TODO: obtain specification return TiffTagRegistry( ( (65324, 'OffsetHighBytes'), (65325, 'ByteCountHighBytes'), (65420, 'FileFormat'), (65421, 'Magnification'), # SourceLens (65422, 'XOffsetFromSlideCenter'), (65423, 'YOffsetFromSlideCenter'), (65424, 'ZOffsetFromSlideCenter'), # FocalPlane (65425, 'TissueIndex'), (65426, 'McuStarts'), (65427, 'SlideLabel'), (65428, 'AuthCode'), # ? (65429, '65429'), (65430, '65430'), (65431, '65431'), (65432, 'McuStartsHighBytes'), (65433, '65433'), (65434, 'Fluorescence'), # FilterSetName, Channel (65435, 'ExposureRatio'), (65436, 'RedMultiplier'), (65437, 'GreenMultiplier'), (65438, 'BlueMultiplier'), (65439, 'FocusPoints'), (65440, 'FocusPointRegions'), (65441, 'CaptureMode'), (65442, 'ScannerSerialNumber'), (65443, '65443'), (65444, 'JpegQuality'), (65445, 'RefocusInterval'), (65446, 'FocusOffset'), (65447, 'BlankLines'), (65448, 'FirmwareVersion'), (65449, 'Comments'), # PropertyMap, CalibrationInfo (65450, 'LabelObscured'), (65451, 'Wavelength'), (65452, '65452'), (65453, 'LampAge'), (65454, 'ExposureTime'), (65455, 'FocusTime'), (65456, 'ScanTime'), (65457, 'WriteTime'), (65458, 'FullyAutoFocus'), (65500, 'DefaultGamma'), ) ) @cached_property def GPS_TAGS(self) -> TiffTagRegistry: """Registry of GPS IFD tags.""" return TiffTagRegistry( ( (0, 'GPSVersionID'), (1, 'GPSLatitudeRef'), (2, 'GPSLatitude'), (3, 'GPSLongitudeRef'), (4, 'GPSLongitude'), (5, 'GPSAltitudeRef'), (6, 'GPSAltitude'), (7, 'GPSTimeStamp'), (8, 'GPSSatellites'), (9, 'GPSStatus'), (10, 'GPSMeasureMode'), (11, 'GPSDOP'), (12, 'GPSSpeedRef'), (13, 'GPSSpeed'), (14, 'GPSTrackRef'), (15, 'GPSTrack'), (16, 'GPSImgDirectionRef'), (17, 'GPSImgDirection'), (18, 'GPSMapDatum'), (19, 'GPSDestLatitudeRef'), (20, 'GPSDestLatitude'), (21, 'GPSDestLongitudeRef'), (22, 'GPSDestLongitude'), (23, 'GPSDestBearingRef'), (24, 'GPSDestBearing'), (25, 'GPSDestDistanceRef'), (26, 'GPSDestDistance'), (27, 'GPSProcessingMethod'), (28, 'GPSAreaInformation'), (29, 'GPSDateStamp'), (30, 'GPSDifferential'), (31, 'GPSHPositioningError'), ) ) @cached_property def IOP_TAGS(self) -> TiffTagRegistry: """Registry of Interoperability IFD tags.""" return TiffTagRegistry( ( (1, 'InteroperabilityIndex'), (2, 'InteroperabilityVersion'), (4096, 'RelatedImageFileFormat'), (4097, 'RelatedImageWidth'), (4098, 'RelatedImageLength'), ) ) @cached_property def PHOTOMETRIC_SAMPLES(self) -> dict[int, int]: """Map :py:class:`PHOTOMETRIC` to number of photometric samples.""" return { 0: 1, # MINISWHITE 1: 1, # MINISBLACK 2: 3, # RGB 3: 1, # PALETTE 4: 1, # MASK 5: 4, # SEPARATED 6: 3, # YCBCR 8: 3, # CIELAB 9: 3, # ICCLAB 10: 3, # ITULAB 32803: 1, # CFA 32844: 1, # LOGL ? 32845: 3, # LOGLUV 34892: 3, # LINEAR_RAW ? 51177: 1, # DEPTH_MAP ? 52527: 1, # SEMANTIC_MASK ? } @cached_property def DATA_FORMATS(self) -> dict[int, str]: """Map :py:class:`DATATYPE` to Python struct formats.""" return { 1: '1B', 2: '1s', 3: '1H', 4: '1I', 5: '2I', 6: '1b', 7: '1B', 8: '1h', 9: '1i', 10: '2i', 11: '1f', 12: '1d', 13: '1I', # 14: '', # 15: '', 16: '1Q', 17: '1q', 18: '1Q', } @cached_property def DATA_DTYPES(self) -> dict[str, int]: """Map NumPy dtype to :py:class:`DATATYPE`.""" return { 'B': 1, 's': 2, 'H': 3, 'I': 4, '2I': 5, 'b': 6, 'h': 8, 'i': 9, '2i': 10, 'f': 11, 'd': 12, 'Q': 16, 'q': 17, } @cached_property def SAMPLE_DTYPES(self) -> dict[tuple[int, int | tuple[int, ...]], str]: """Map :py:class:`SAMPLEFORMAT` and BitsPerSample to NumPy dtype.""" return { # UINT (1, 1): '?', # bitmap (1, 2): 'B', (1, 3): 'B', (1, 4): 'B', (1, 5): 'B', (1, 6): 'B', (1, 7): 'B', (1, 8): 'B', (1, 9): 'H', (1, 10): 'H', (1, 11): 'H', (1, 12): 'H', (1, 13): 'H', (1, 14): 'H', (1, 15): 'H', (1, 16): 'H', (1, 17): 'I', (1, 18): 'I', (1, 19): 'I', (1, 20): 'I', (1, 21): 'I', (1, 22): 'I', (1, 23): 'I', (1, 24): 'I', (1, 25): 'I', (1, 26): 'I', (1, 27): 'I', (1, 28): 'I', (1, 29): 'I', (1, 30): 'I', (1, 31): 'I', (1, 32): 'I', (1, 64): 'Q', # VOID : treat as UINT (4, 1): '?', # bitmap (4, 2): 'B', (4, 3): 'B', (4, 4): 'B', (4, 5): 'B', (4, 6): 'B', (4, 7): 'B', (4, 8): 'B', (4, 9): 'H', (4, 10): 'H', (4, 11): 'H', (4, 12): 'H', (4, 13): 'H', (4, 14): 'H', (4, 15): 'H', (4, 16): 'H', (4, 17): 'I', (4, 18): 'I', (4, 19): 'I', (4, 20): 'I', (4, 21): 'I', (4, 22): 'I', (4, 23): 'I', (4, 24): 'I', (4, 25): 'I', (4, 26): 'I', (4, 27): 'I', (4, 28): 'I', (4, 29): 'I', (4, 30): 'I', (4, 31): 'I', (4, 32): 'I', (4, 64): 'Q', # INT (2, 8): 'b', (2, 16): 'h', (2, 32): 'i', (2, 64): 'q', # IEEEFP (3, 16): 'e', (3, 24): 'f', # float24 bit not supported by numpy (3, 32): 'f', (3, 64): 'd', # COMPLEXIEEEFP (6, 64): 'F', (6, 128): 'D', # RGB565 (1, (5, 6, 5)): 'B', # COMPLEXINT : not supported by numpy (5, 16): 'E', (5, 32): 'F', (5, 64): 'D', } @cached_property def PREDICTORS(self) -> Mapping[int, Callable[..., Any]]: """Map :py:class:`PREDICTOR` value to encode function.""" return PredictorCodec(True) @cached_property def UNPREDICTORS(self) -> Mapping[int, Callable[..., Any]]: """Map :py:class:`PREDICTOR` value to decode function.""" return PredictorCodec(False) @cached_property def COMPRESSORS(self) -> Mapping[int, Callable[..., Any]]: """Map :py:class:`COMPRESSION` value to compress function.""" return CompressionCodec(True) @cached_property def DECOMPRESSORS(self) -> Mapping[int, Callable[..., Any]]: """Map :py:class:`COMPRESSION` value to decompress function.""" return CompressionCodec(False) @cached_property def IMAGE_COMPRESSIONS(self) -> set[int]: # set of compression to encode/decode images # encode/decode preserves shape and dtype # cannot be used with predictors or fillorder return { 6, # jpeg 7, # jpeg 22610, # jpegxr 33003, # jpeg2k 33004, # jpeg2k 33005, # jpeg2k 33007, # alt_jpeg 34712, # jpeg2k 34892, # jpeg 34933, # png 34934, # jpegxr ZIF 48124, # jetraw 50001, # webp 50002, # jpegxl 52546, # jpegxl DNG } @cached_property def AXES_NAMES(self) -> dict[str, str]: """Map axes character codes to dimension names. - **X : width** (image width) - **Y : height** (image length) - **Z : depth** (image depth) - **S : sample** (color space and extra samples) - **I : sequence** (generic sequence of images, frames, planes, pages) - **T : time** (time series) - **C : channel** (acquisition path or emission wavelength) - **A : angle** (OME) - **P : phase** (OME. In LSM, **P** maps to **position**) - **R : tile** (OME. Region, position, or mosaic) - **H : lifetime** (OME. Histogram) - **E : lambda** (OME. Excitation wavelength) - **Q : other** (OME) - **L : exposure** (FluoView) - **V : event** (FluoView) - **M : mosaic** (LSM 6) - **J : column** (NDTiff) - **K : row** (NDTiff) There is no universal standard for dimension codes or names. This mapping mainly follows TIFF, OME-TIFF, ImageJ, LSM, and FluoView conventions. """ return { 'X': 'width', 'Y': 'height', 'Z': 'depth', 'S': 'sample', 'I': 'sequence', # 'F': 'file', 'T': 'time', 'C': 'channel', 'A': 'angle', 'P': 'phase', 'R': 'tile', 'H': 'lifetime', 'E': 'lambda', 'L': 'exposure', 'V': 'event', 'M': 'mosaic', 'Q': 'other', 'J': 'column', 'K': 'row', } @cached_property def AXES_CODES(self) -> dict[str, str]: """Map dimension names to axes character codes. Reverse mapping of :py:attr:`AXES_NAMES`. """ codes = {name: code for code, name in TIFF.AXES_NAMES.items()} codes['z'] = 'Z' # NDTiff codes['position'] = 'R' # NDTiff return codes @cached_property def GEO_KEYS(self) -> type[enum.IntEnum]: """:py:class:`geodb.GeoKeys`.""" try: from .geodb import GeoKeys except ImportError: class GeoKeys(enum.IntEnum): # type: ignore pass return GeoKeys @cached_property def GEO_CODES(self) -> dict[int, type[enum.IntEnum]]: """Map :py:class:`geodb.GeoKeys` to GeoTIFF codes.""" try: from .geodb import GEO_CODES except ImportError: GEO_CODES = {} return GEO_CODES @cached_property def PAGE_FLAGS(self) -> set[str]: # TiffFile and TiffPage 'is_\*' attributes exclude = { 'reduced', 'mask', 'final', 'memmappable', 'contiguous', 'tiled', 'subsampled', 'jfif', } return { a[3:] for a in dir(TiffPage) if a[:3] == 'is_' and a[3:] not in exclude } @cached_property def FILE_FLAGS(self) -> set[str]: # TiffFile 'is_\*' attributes exclude = {'bigtiff', 'appendable'} return { a[3:] for a in dir(TiffFile) if a[:3] == 'is_' and a[3:] not in exclude }.union(TIFF.PAGE_FLAGS) @property def FILE_PATTERNS(self) -> dict[str, str]: # predefined FileSequence patterns return { 'axes': r"""(?ix) # matches Olympus OIF and Leica TIFF series _?(?:(q|l|p|a|c|t|x|y|z|ch|tp)(\d{1,4})) _?(?:(q|l|p|a|c|t|x|y|z|ch|tp)(\d{1,4}))? _?(?:(q|l|p|a|c|t|x|y|z|ch|tp)(\d{1,4}))? _?(?:(q|l|p|a|c|t|x|y|z|ch|tp)(\d{1,4}))? _?(?:(q|l|p|a|c|t|x|y|z|ch|tp)(\d{1,4}))? _?(?:(q|l|p|a|c|t|x|y|z|ch|tp)(\d{1,4}))? _?(?:(q|l|p|a|c|t|x|y|z|ch|tp)(\d{1,4}))? """ } @property def FILE_EXTENSIONS(self) -> tuple[str, ...]: """Known TIFF file extensions.""" return ( 'tif', 'tiff', 'ome.tif', 'lsm', 'stk', 'qpi', 'pcoraw', 'qptiff', 'ptiff', 'ptif', 'gel', 'seq', 'svs', 'scn', 'zif', 'ndpi', 'bif', 'tf8', 'tf2', 'btf', 'eer', ) @property def FILEOPEN_FILTER(self) -> list[tuple[str, str]]: # string for use in Windows File Open box return [ (f'{ext.upper()} files', f'*.{ext}') for ext in TIFF.FILE_EXTENSIONS ] + [('allfiles', '*')] @property def CZ_LSMINFO(self) -> list[tuple[str, str]]: # numpy data type of LSMINFO structure return [ ('MagicNumber', 'u4'), ('StructureSize', 'i4'), ('DimensionX', 'i4'), ('DimensionY', 'i4'), ('DimensionZ', 'i4'), ('DimensionChannels', 'i4'), ('DimensionTime', 'i4'), ('DataType', 'i4'), # DATATYPES ('ThumbnailX', 'i4'), ('ThumbnailY', 'i4'), ('VoxelSizeX', 'f8'), ('VoxelSizeY', 'f8'), ('VoxelSizeZ', 'f8'), ('OriginX', 'f8'), ('OriginY', 'f8'), ('OriginZ', 'f8'), ('ScanType', 'u2'), ('SpectralScan', 'u2'), ('TypeOfData', 'u4'), # TYPEOFDATA ('OffsetVectorOverlay', 'u4'), ('OffsetInputLut', 'u4'), ('OffsetOutputLut', 'u4'), ('OffsetChannelColors', 'u4'), ('TimeIntervall', 'f8'), ('OffsetChannelDataTypes', 'u4'), ('OffsetScanInformation', 'u4'), # SCANINFO ('OffsetKsData', 'u4'), ('OffsetTimeStamps', 'u4'), ('OffsetEventList', 'u4'), ('OffsetRoi', 'u4'), ('OffsetBleachRoi', 'u4'), ('OffsetNextRecording', 'u4'), # LSM 2.0 ends here ('DisplayAspectX', 'f8'), ('DisplayAspectY', 'f8'), ('DisplayAspectZ', 'f8'), ('DisplayAspectTime', 'f8'), ('OffsetMeanOfRoisOverlay', 'u4'), ('OffsetTopoIsolineOverlay', 'u4'), ('OffsetTopoProfileOverlay', 'u4'), ('OffsetLinescanOverlay', 'u4'), ('ToolbarFlags', 'u4'), ('OffsetChannelWavelength', 'u4'), ('OffsetChannelFactors', 'u4'), ('ObjectiveSphereCorrection', 'f8'), ('OffsetUnmixParameters', 'u4'), # LSM 3.2, 4.0 end here ('OffsetAcquisitionParameters', 'u4'), ('OffsetCharacteristics', 'u4'), ('OffsetPalette', 'u4'), ('TimeDifferenceX', 'f8'), ('TimeDifferenceY', 'f8'), ('TimeDifferenceZ', 'f8'), ('InternalUse1', 'u4'), ('DimensionP', 'i4'), ('DimensionM', 'i4'), ('DimensionsReserved', '16i4'), ('OffsetTilePositions', 'u4'), ('', '9u4'), # Reserved ('OffsetPositions', 'u4'), # ('', '21u4'), # must be 0 ] @property def CZ_LSMINFO_READERS( self, ) -> dict[str, Callable[[FileHandle], Any] | None]: # import functions for CZ_LSMINFO sub-records # TODO: read more CZ_LSMINFO sub-records return { 'ScanInformation': read_lsm_scaninfo, 'TimeStamps': read_lsm_timestamps, 'EventList': read_lsm_eventlist, 'ChannelColors': read_lsm_channelcolors, 'Positions': read_lsm_positions, 'TilePositions': read_lsm_positions, 'VectorOverlay': None, 'InputLut': read_lsm_lookuptable, 'OutputLut': read_lsm_lookuptable, 'TimeIntervall': None, 'ChannelDataTypes': read_lsm_channeldatatypes, 'KsData': None, 'Roi': None, 'BleachRoi': None, 'NextRecording': None, # read with TiffFile(fh, offset=) 'MeanOfRoisOverlay': None, 'TopoIsolineOverlay': None, 'TopoProfileOverlay': None, 'ChannelWavelength': read_lsm_channelwavelength, 'SphereCorrection': None, 'ChannelFactors': None, 'UnmixParameters': None, 'AcquisitionParameters': None, 'Characteristics': None, } @property def CZ_LSMINFO_SCANTYPE(self) -> dict[int, str]: # map CZ_LSMINFO.ScanType to dimension order return { 0: 'XYZCT', # 'Stack' normal x-y-z-scan 1: 'XYZCT', # 'Z-Scan' x-z-plane Y=1 2: 'XYZCT', # 'Line' 3: 'XYTCZ', # 'Time Series Plane' time series x-y XYCTZ ? Z=1 4: 'XYZTC', # 'Time Series z-Scan' time series x-z 5: 'XYTCZ', # 'Time Series Mean-of-ROIs' 6: 'XYZTC', # 'Time Series Stack' time series x-y-z 7: 'XYCTZ', # Spline Scan 8: 'XYCZT', # Spline Plane x-z 9: 'XYTCZ', # Time Series Spline Plane x-z 10: 'XYZCT', # 'Time Series Point' point mode } @property def CZ_LSMINFO_DIMENSIONS(self) -> dict[str, str]: # map dimension codes to CZ_LSMINFO attribute return { 'X': 'DimensionX', 'Y': 'DimensionY', 'Z': 'DimensionZ', 'C': 'DimensionChannels', 'T': 'DimensionTime', 'P': 'DimensionP', 'M': 'DimensionM', } @property def CZ_LSMINFO_DATATYPES(self) -> dict[int, str]: # description of CZ_LSMINFO.DataType return { 0: 'varying data types', 1: '8 bit unsigned integer', 2: '12 bit unsigned integer', 5: '32 bit float', } @property def CZ_LSMINFO_TYPEOFDATA(self) -> dict[int, str]: # description of CZ_LSMINFO.TypeOfData return { 0: 'Original scan data', 1: 'Calculated data', 2: '3D reconstruction', 3: 'Topography height map', } @property def CZ_LSMINFO_SCANINFO_ARRAYS(self) -> dict[int, str]: return { 0x20000000: 'Tracks', 0x30000000: 'Lasers', 0x60000000: 'DetectionChannels', 0x80000000: 'IlluminationChannels', 0xA0000000: 'BeamSplitters', 0xC0000000: 'DataChannels', 0x11000000: 'Timers', 0x13000000: 'Markers', } @property def CZ_LSMINFO_SCANINFO_STRUCTS(self) -> dict[int, str]: return { # 0x10000000: 'Recording', 0x40000000: 'Track', 0x50000000: 'Laser', 0x70000000: 'DetectionChannel', 0x90000000: 'IlluminationChannel', 0xB0000000: 'BeamSplitter', 0xD0000000: 'DataChannel', 0x12000000: 'Timer', 0x14000000: 'Marker', } @property def CZ_LSMINFO_SCANINFO_ATTRIBUTES(self) -> dict[int, str]: return { # Recording 0x10000001: 'Name', 0x10000002: 'Description', 0x10000003: 'Notes', 0x10000004: 'Objective', 0x10000005: 'ProcessingSummary', 0x10000006: 'SpecialScanMode', 0x10000007: 'ScanType', 0x10000008: 'ScanMode', 0x10000009: 'NumberOfStacks', 0x1000000A: 'LinesPerPlane', 0x1000000B: 'SamplesPerLine', 0x1000000C: 'PlanesPerVolume', 0x1000000D: 'ImagesWidth', 0x1000000E: 'ImagesHeight', 0x1000000F: 'ImagesNumberPlanes', 0x10000010: 'ImagesNumberStacks', 0x10000011: 'ImagesNumberChannels', 0x10000012: 'LinscanXySize', 0x10000013: 'ScanDirection', 0x10000014: 'TimeSeries', 0x10000015: 'OriginalScanData', 0x10000016: 'ZoomX', 0x10000017: 'ZoomY', 0x10000018: 'ZoomZ', 0x10000019: 'Sample0X', 0x1000001A: 'Sample0Y', 0x1000001B: 'Sample0Z', 0x1000001C: 'SampleSpacing', 0x1000001D: 'LineSpacing', 0x1000001E: 'PlaneSpacing', 0x1000001F: 'PlaneWidth', 0x10000020: 'PlaneHeight', 0x10000021: 'VolumeDepth', 0x10000023: 'Nutation', 0x10000034: 'Rotation', 0x10000035: 'Precession', 0x10000036: 'Sample0time', 0x10000037: 'StartScanTriggerIn', 0x10000038: 'StartScanTriggerOut', 0x10000039: 'StartScanEvent', 0x10000040: 'StartScanTime', 0x10000041: 'StopScanTriggerIn', 0x10000042: 'StopScanTriggerOut', 0x10000043: 'StopScanEvent', 0x10000044: 'StopScanTime', 0x10000045: 'UseRois', 0x10000046: 'UseReducedMemoryRois', 0x10000047: 'User', 0x10000048: 'UseBcCorrection', 0x10000049: 'PositionBcCorrection1', 0x10000050: 'PositionBcCorrection2', 0x10000051: 'InterpolationY', 0x10000052: 'CameraBinning', 0x10000053: 'CameraSupersampling', 0x10000054: 'CameraFrameWidth', 0x10000055: 'CameraFrameHeight', 0x10000056: 'CameraOffsetX', 0x10000057: 'CameraOffsetY', 0x10000059: 'RtBinning', 0x1000005A: 'RtFrameWidth', 0x1000005B: 'RtFrameHeight', 0x1000005C: 'RtRegionWidth', 0x1000005D: 'RtRegionHeight', 0x1000005E: 'RtOffsetX', 0x1000005F: 'RtOffsetY', 0x10000060: 'RtZoom', 0x10000061: 'RtLinePeriod', 0x10000062: 'Prescan', 0x10000063: 'ScanDirectionZ', # Track 0x40000001: 'MultiplexType', # 0 After Line; 1 After Frame 0x40000002: 'MultiplexOrder', 0x40000003: 'SamplingMode', # 0 Sample; 1 Line Avg; 2 Frame Avg 0x40000004: 'SamplingMethod', # 1 Mean; 2 Sum 0x40000005: 'SamplingNumber', 0x40000006: 'Acquire', 0x40000007: 'SampleObservationTime', 0x4000000B: 'TimeBetweenStacks', 0x4000000C: 'Name', 0x4000000D: 'Collimator1Name', 0x4000000E: 'Collimator1Position', 0x4000000F: 'Collimator2Name', 0x40000010: 'Collimator2Position', 0x40000011: 'IsBleachTrack', 0x40000012: 'IsBleachAfterScanNumber', 0x40000013: 'BleachScanNumber', 0x40000014: 'TriggerIn', 0x40000015: 'TriggerOut', 0x40000016: 'IsRatioTrack', 0x40000017: 'BleachCount', 0x40000018: 'SpiCenterWavelength', 0x40000019: 'PixelTime', 0x40000021: 'CondensorFrontlens', 0x40000023: 'FieldStopValue', 0x40000024: 'IdCondensorAperture', 0x40000025: 'CondensorAperture', 0x40000026: 'IdCondensorRevolver', 0x40000027: 'CondensorFilter', 0x40000028: 'IdTransmissionFilter1', 0x40000029: 'IdTransmission1', 0x40000030: 'IdTransmissionFilter2', 0x40000031: 'IdTransmission2', 0x40000032: 'RepeatBleach', 0x40000033: 'EnableSpotBleachPos', 0x40000034: 'SpotBleachPosx', 0x40000035: 'SpotBleachPosy', 0x40000036: 'SpotBleachPosz', 0x40000037: 'IdTubelens', 0x40000038: 'IdTubelensPosition', 0x40000039: 'TransmittedLight', 0x4000003A: 'ReflectedLight', 0x4000003B: 'SimultanGrabAndBleach', 0x4000003C: 'BleachPixelTime', # Laser 0x50000001: 'Name', 0x50000002: 'Acquire', 0x50000003: 'Power', # DetectionChannel 0x70000001: 'IntegrationMode', 0x70000002: 'SpecialMode', 0x70000003: 'DetectorGainFirst', 0x70000004: 'DetectorGainLast', 0x70000005: 'AmplifierGainFirst', 0x70000006: 'AmplifierGainLast', 0x70000007: 'AmplifierOffsFirst', 0x70000008: 'AmplifierOffsLast', 0x70000009: 'PinholeDiameter', 0x7000000A: 'CountingTrigger', 0x7000000B: 'Acquire', 0x7000000C: 'PointDetectorName', 0x7000000D: 'AmplifierName', 0x7000000E: 'PinholeName', 0x7000000F: 'FilterSetName', 0x70000010: 'FilterName', 0x70000013: 'IntegratorName', 0x70000014: 'ChannelName', 0x70000015: 'DetectorGainBc1', 0x70000016: 'DetectorGainBc2', 0x70000017: 'AmplifierGainBc1', 0x70000018: 'AmplifierGainBc2', 0x70000019: 'AmplifierOffsetBc1', 0x70000020: 'AmplifierOffsetBc2', 0x70000021: 'SpectralScanChannels', 0x70000022: 'SpiWavelengthStart', 0x70000023: 'SpiWavelengthStop', 0x70000026: 'DyeName', 0x70000027: 'DyeFolder', # IlluminationChannel 0x90000001: 'Name', 0x90000002: 'Power', 0x90000003: 'Wavelength', 0x90000004: 'Aquire', 0x90000005: 'DetchannelName', 0x90000006: 'PowerBc1', 0x90000007: 'PowerBc2', # BeamSplitter 0xB0000001: 'FilterSet', 0xB0000002: 'Filter', 0xB0000003: 'Name', # DataChannel 0xD0000001: 'Name', 0xD0000003: 'Acquire', 0xD0000004: 'Color', 0xD0000005: 'SampleType', 0xD0000006: 'BitsPerSample', 0xD0000007: 'RatioType', 0xD0000008: 'RatioTrack1', 0xD0000009: 'RatioTrack2', 0xD000000A: 'RatioChannel1', 0xD000000B: 'RatioChannel2', 0xD000000C: 'RatioConst1', 0xD000000D: 'RatioConst2', 0xD000000E: 'RatioConst3', 0xD000000F: 'RatioConst4', 0xD0000010: 'RatioConst5', 0xD0000011: 'RatioConst6', 0xD0000012: 'RatioFirstImages1', 0xD0000013: 'RatioFirstImages2', 0xD0000014: 'DyeName', 0xD0000015: 'DyeFolder', 0xD0000016: 'Spectrum', 0xD0000017: 'Acquire', # Timer 0x12000001: 'Name', 0x12000002: 'Description', 0x12000003: 'Interval', 0x12000004: 'TriggerIn', 0x12000005: 'TriggerOut', 0x12000006: 'ActivationTime', 0x12000007: 'ActivationNumber', # Marker 0x14000001: 'Name', 0x14000002: 'Description', 0x14000003: 'TriggerIn', 0x14000004: 'TriggerOut', } @cached_property def CZ_LSM_LUTTYPE(self): # TODO: type this class CZ_LSM_LUTTYPE(enum.IntEnum): NORMAL = 0 ORIGINAL = 1 RAMP = 2 POLYLINE = 3 SPLINE = 4 GAMMA = 5 return CZ_LSM_LUTTYPE @cached_property def CZ_LSM_SUBBLOCK_TYPE(self): # TODO: type this class CZ_LSM_SUBBLOCK_TYPE(enum.IntEnum): END = 0 GAMMA = 1 BRIGHTNESS = 2 CONTRAST = 3 RAMP = 4 KNOTS = 5 PALETTE_12_TO_12 = 6 return CZ_LSM_SUBBLOCK_TYPE @property def NIH_IMAGE_HEADER(self): # TODO: type this return [ ('FileID', 'S8'), ('nLines', 'i2'), ('PixelsPerLine', 'i2'), ('Version', 'i2'), ('OldLutMode', 'i2'), ('OldnColors', 'i2'), ('Colors', 'u1', (3, 32)), ('OldColorStart', 'i2'), ('ColorWidth', 'i2'), ('ExtraColors', 'u2', (6, 3)), ('nExtraColors', 'i2'), ('ForegroundIndex', 'i2'), ('BackgroundIndex', 'i2'), ('XScale', 'f8'), ('Unused2', 'i2'), ('Unused3', 'i2'), ('UnitsID', 'i2'), # NIH_UNITS_TYPE ('p1', [('x', 'i2'), ('y', 'i2')]), ('p2', [('x', 'i2'), ('y', 'i2')]), ('CurveFitType', 'i2'), # NIH_CURVEFIT_TYPE ('nCoefficients', 'i2'), ('Coeff', 'f8', 6), ('UMsize', 'u1'), ('UM', 'S15'), ('UnusedBoolean', 'u1'), ('BinaryPic', 'b1'), ('SliceStart', 'i2'), ('SliceEnd', 'i2'), ('ScaleMagnification', 'f4'), ('nSlices', 'i2'), ('SliceSpacing', 'f4'), ('CurrentSlice', 'i2'), ('FrameInterval', 'f4'), ('PixelAspectRatio', 'f4'), ('ColorStart', 'i2'), ('ColorEnd', 'i2'), ('nColors', 'i2'), ('Fill1', '3u2'), ('Fill2', '3u2'), ('Table', 'u1'), # NIH_COLORTABLE_TYPE ('LutMode', 'u1'), # NIH_LUTMODE_TYPE ('InvertedTable', 'b1'), ('ZeroClip', 'b1'), ('XUnitSize', 'u1'), ('XUnit', 'S11'), ('StackType', 'i2'), # NIH_STACKTYPE_TYPE # ('UnusedBytes', 'u1', 200) ] @property def NIH_COLORTABLE_TYPE(self) -> tuple[str, ...]: return ( 'CustomTable', 'AppleDefault', 'Pseudo20', 'Pseudo32', 'Rainbow', 'Fire1', 'Fire2', 'Ice', 'Grays', 'Spectrum', ) @property def NIH_LUTMODE_TYPE(self) -> tuple[str, ...]: return ( 'PseudoColor', 'OldAppleDefault', 'OldSpectrum', 'GrayScale', 'ColorLut', 'CustomGrayscale', ) @property def NIH_CURVEFIT_TYPE(self) -> tuple[str, ...]: return ( 'StraightLine', 'Poly2', 'Poly3', 'Poly4', 'Poly5', 'ExpoFit', 'PowerFit', 'LogFit', 'RodbardFit', 'SpareFit1', 'Uncalibrated', 'UncalibratedOD', ) @property def NIH_UNITS_TYPE(self) -> tuple[str, ...]: return ( 'Nanometers', 'Micrometers', 'Millimeters', 'Centimeters', 'Meters', 'Kilometers', 'Inches', 'Feet', 'Miles', 'Pixels', 'OtherUnits', ) @property def TVIPS_HEADER_V1(self) -> list[tuple[str, str]]: # TVIPS TemData structure from EMMENU Help file return [ ('Version', 'i4'), ('CommentV1', 'S80'), ('HighTension', 'i4'), ('SphericalAberration', 'i4'), ('IlluminationAperture', 'i4'), ('Magnification', 'i4'), ('PostMagnification', 'i4'), ('FocalLength', 'i4'), ('Defocus', 'i4'), ('Astigmatism', 'i4'), ('AstigmatismDirection', 'i4'), ('BiprismVoltage', 'i4'), ('SpecimenTiltAngle', 'i4'), ('SpecimenTiltDirection', 'i4'), ('IlluminationTiltDirection', 'i4'), ('IlluminationTiltAngle', 'i4'), ('ImageMode', 'i4'), ('EnergySpread', 'i4'), ('ChromaticAberration', 'i4'), ('ShutterType', 'i4'), ('DefocusSpread', 'i4'), ('CcdNumber', 'i4'), ('CcdSize', 'i4'), ('OffsetXV1', 'i4'), ('OffsetYV1', 'i4'), ('PhysicalPixelSize', 'i4'), ('Binning', 'i4'), ('ReadoutSpeed', 'i4'), ('GainV1', 'i4'), ('SensitivityV1', 'i4'), ('ExposureTimeV1', 'i4'), ('FlatCorrected', 'i4'), ('DeadPxCorrected', 'i4'), ('ImageMean', 'i4'), ('ImageStd', 'i4'), ('DisplacementX', 'i4'), ('DisplacementY', 'i4'), ('DateV1', 'i4'), ('TimeV1', 'i4'), ('ImageMin', 'i4'), ('ImageMax', 'i4'), ('ImageStatisticsQuality', 'i4'), ] @property def TVIPS_HEADER_V2(self) -> list[tuple[str, str]]: return [ ('ImageName', 'V160'), # utf16 ('ImageFolder', 'V160'), ('ImageSizeX', 'i4'), ('ImageSizeY', 'i4'), ('ImageSizeZ', 'i4'), ('ImageSizeE', 'i4'), ('ImageDataType', 'i4'), ('Date', 'i4'), ('Time', 'i4'), ('Comment', 'V1024'), ('ImageHistory', 'V1024'), ('Scaling', '16f4'), ('ImageStatistics', '16c16'), ('ImageType', 'i4'), ('ImageDisplaType', 'i4'), ('PixelSizeX', 'f4'), # distance between two px in x, [nm] ('PixelSizeY', 'f4'), # distance between two px in y, [nm] ('ImageDistanceZ', 'f4'), ('ImageDistanceE', 'f4'), ('ImageMisc', '32f4'), ('TemType', 'V160'), ('TemHighTension', 'f4'), ('TemAberrations', '32f4'), ('TemEnergy', '32f4'), ('TemMode', 'i4'), ('TemMagnification', 'f4'), ('TemMagnificationCorrection', 'f4'), ('PostMagnification', 'f4'), ('TemStageType', 'i4'), ('TemStagePosition', '5f4'), # x, y, z, a, b ('TemImageShift', '2f4'), ('TemBeamShift', '2f4'), ('TemBeamTilt', '2f4'), ('TilingParameters', '7f4'), # 0: tiling? 1:x 2:y 3: max x # 4: max y 5: overlap x 6: overlap y ('TemIllumination', '3f4'), # 0: spotsize 1: intensity ('TemShutter', 'i4'), ('TemMisc', '32f4'), ('CameraType', 'V160'), ('PhysicalPixelSizeX', 'f4'), ('PhysicalPixelSizeY', 'f4'), ('OffsetX', 'i4'), ('OffsetY', 'i4'), ('BinningX', 'i4'), ('BinningY', 'i4'), ('ExposureTime', 'f4'), ('Gain', 'f4'), ('ReadoutRate', 'f4'), ('FlatfieldDescription', 'V160'), ('Sensitivity', 'f4'), ('Dose', 'f4'), ('CamMisc', '32f4'), ('FeiMicroscopeInformation', 'V1024'), ('FeiSpecimenInformation', 'V1024'), ('Magic', 'u4'), ] @property def MM_HEADER(self) -> list[tuple[Any, ...]]: # Olympus FluoView MM_Header MM_DIMENSION = [ ('Name', 'S16'), ('Size', 'i4'), ('Origin', 'f8'), ('Resolution', 'f8'), ('Unit', 'S64'), ] return [ ('HeaderFlag', 'i2'), ('ImageType', 'u1'), ('ImageName', 'S257'), ('OffsetData', 'u4'), ('PaletteSize', 'i4'), ('OffsetPalette0', 'u4'), ('OffsetPalette1', 'u4'), ('CommentSize', 'i4'), ('OffsetComment', 'u4'), ('Dimensions', MM_DIMENSION, 10), ('OffsetPosition', 'u4'), ('MapType', 'i2'), ('MapMin', 'f8'), ('MapMax', 'f8'), ('MinValue', 'f8'), ('MaxValue', 'f8'), ('OffsetMap', 'u4'), ('Gamma', 'f8'), ('Offset', 'f8'), ('GrayChannel', MM_DIMENSION), ('OffsetThumbnail', 'u4'), ('VoiceField', 'i4'), ('OffsetVoiceField', 'u4'), ] @property def MM_DIMENSIONS(self) -> dict[str, str]: # map FluoView MM_Header.Dimensions to axes characters return { 'X': 'X', 'Y': 'Y', 'Z': 'Z', 'T': 'T', 'CH': 'C', 'WAVELENGTH': 'C', 'TIME': 'T', 'XY': 'R', 'EVENT': 'V', 'EXPOSURE': 'L', } @property def UIC_TAGS(self) -> list[tuple[str, Any]]: # map Universal Imaging Corporation MetaMorph internal tag ids to # name and type from fractions import Fraction return [ ('AutoScale', int), ('MinScale', int), ('MaxScale', int), ('SpatialCalibration', int), ('XCalibration', Fraction), ('YCalibration', Fraction), ('CalibrationUnits', str), ('Name', str), ('ThreshState', int), ('ThreshStateRed', int), ('tagid_10', None), # undefined ('ThreshStateGreen', int), ('ThreshStateBlue', int), ('ThreshStateLo', int), ('ThreshStateHi', int), ('Zoom', int), ('CreateTime', julian_datetime), ('LastSavedTime', julian_datetime), ('currentBuffer', int), ('grayFit', None), ('grayPointCount', None), ('grayX', Fraction), ('grayY', Fraction), ('grayMin', Fraction), ('grayMax', Fraction), ('grayUnitName', str), ('StandardLUT', int), ('wavelength', int), ('StagePosition', '(%i,2,2)u4'), # N xy positions as fract ('CameraChipOffset', '(%i,2,2)u4'), # N xy offsets as fract ('OverlayMask', None), ('OverlayCompress', None), ('Overlay', None), ('SpecialOverlayMask', None), ('SpecialOverlayCompress', None), ('SpecialOverlay', None), ('ImageProperty', read_uic_image_property), ('StageLabel', '%ip'), # N str ('AutoScaleLoInfo', Fraction), ('AutoScaleHiInfo', Fraction), ('AbsoluteZ', '(%i,2)u4'), # N fractions ('AbsoluteZValid', '(%i,)u4'), # N long ('Gamma', 'I'), # 'I' uses offset ('GammaRed', 'I'), ('GammaGreen', 'I'), ('GammaBlue', 'I'), ('CameraBin', '2I'), ('NewLUT', int), ('ImagePropertyEx', None), ('PlaneProperty', int), ('UserLutTable', '(256,3)u1'), ('RedAutoScaleInfo', int), ('RedAutoScaleLoInfo', Fraction), ('RedAutoScaleHiInfo', Fraction), ('RedMinScaleInfo', int), ('RedMaxScaleInfo', int), ('GreenAutoScaleInfo', int), ('GreenAutoScaleLoInfo', Fraction), ('GreenAutoScaleHiInfo', Fraction), ('GreenMinScaleInfo', int), ('GreenMaxScaleInfo', int), ('BlueAutoScaleInfo', int), ('BlueAutoScaleLoInfo', Fraction), ('BlueAutoScaleHiInfo', Fraction), ('BlueMinScaleInfo', int), ('BlueMaxScaleInfo', int), # ('OverlayPlaneColor', read_uic_overlay_plane_color), ] @property def PILATUS_HEADER(self) -> dict[str, Any]: # PILATUS CBF Header Specification, Version 1.4 # map key to [value_indices], type return { 'Detector': ([slice(1, None)], str), 'Pixel_size': ([1, 4], float), 'Silicon': ([3], float), 'Exposure_time': ([1], float), 'Exposure_period': ([1], float), 'Tau': ([1], float), 'Count_cutoff': ([1], int), 'Threshold_setting': ([1], float), 'Gain_setting': ([1, 2], str), 'N_excluded_pixels': ([1], int), 'Excluded_pixels': ([1], str), 'Flat_field': ([1], str), 'Trim_file': ([1], str), 'Image_path': ([1], str), # optional 'Wavelength': ([1], float), 'Energy_range': ([1, 2], float), 'Detector_distance': ([1], float), 'Detector_Voffset': ([1], float), 'Beam_xy': ([1, 2], float), 'Flux': ([1], str), 'Filter_transmission': ([1], float), 'Start_angle': ([1], float), 'Angle_increment': ([1], float), 'Detector_2theta': ([1], float), 'Polarization': ([1], float), 'Alpha': ([1], float), 'Kappa': ([1], float), 'Phi': ([1], float), 'Phi_increment': ([1], float), 'Chi': ([1], float), 'Chi_increment': ([1], float), 'Oscillation_axis': ([slice(1, None)], str), 'N_oscillations': ([1], int), 'Start_position': ([1], float), 'Position_increment': ([1], float), 'Shutter_time': ([1], float), 'Omega': ([1], float), 'Omega_increment': ([1], float), } @cached_property def ALLOCATIONGRANULARITY(self) -> int: # alignment for writing contiguous data to TIFF import mmap return mmap.ALLOCATIONGRANULARITY @cached_property def MAXWORKERS(self) -> int: """Default maximum number of threads for de/compressing segments. The value of the ``TIFFFILE_NUM_THREADS`` environment variable if set, else half the CPU cores up to 32. """ if 'TIFFFILE_NUM_THREADS' in os.environ: return max(1, int(os.environ['TIFFFILE_NUM_THREADS'])) cpu_count: int | None try: cpu_count = len(os.sched_getaffinity(0)) # type: ignore except AttributeError: cpu_count = os.cpu_count() if cpu_count is None: return 1 return min(32, max(1, cpu_count // 2)) @cached_property def MAXIOWORKERS(self) -> int: """Default maximum number of I/O threads for reading file sequences. The value of the ``TIFFFILE_NUM_IOTHREADS`` environment variable if set, else 4 more than the number of CPU cores up to 32. """ if 'TIFFFILE_NUM_IOTHREADS' in os.environ: return max(1, int(os.environ['TIFFFILE_NUM_IOTHREADS'])) cpu_count: int | None try: cpu_count = len(os.sched_getaffinity(0)) # type: ignore except AttributeError: cpu_count = os.cpu_count() if cpu_count is None: return 5 return min(32, cpu_count + 4) BUFFERSIZE: int = 268435456 """Default number of bytes to read or encode in one pass (256 MB).""" # make enums available in the TIFF namespace for backwards compatibility. # These type aliases cannot be used as typing hints. @property def CHUNKMODE(self) -> type[CHUNKMODE]: """Deprecated alias of :py:class:`CHUNKMODE`.""" warnings.warn( ' is deprecated since 2022.7.28. ' 'Use tifffile.CHUNKMODE', DeprecationWarning, stacklevel=2, ) return CHUNKMODE @property def COMPRESSION(self) -> type[COMPRESSION]: """Deprecated alias of :py:class:`COMPRESSION`.""" warnings.warn( ' is deprecated since 2022.7.28. ' 'Use tifffile.COMPRESSION', DeprecationWarning, stacklevel=2, ) return COMPRESSION @property def PREDICTOR(self) -> type[PREDICTOR]: """Deprecated alias of :py:class:`PREDICTOR`.""" warnings.warn( ' is deprecated since 2022.7.28. ' 'Use tifffile.PREDICTOR', DeprecationWarning, stacklevel=2, ) return PREDICTOR @property def EXTRASAMPLE(self) -> type[EXTRASAMPLE]: """Deprecated alias of :py:class:`EXTRASAMPLE`.""" warnings.warn( ' is deprecated since 2022.7.28. ' 'Use tifffile.EXTRASAMPLE', DeprecationWarning, stacklevel=2, ) return EXTRASAMPLE @property def FILETYPE(self) -> type[FILETYPE]: """Deprecated alias of :py:class:`FILETYPE`.""" warnings.warn( ' is deprecated since 2022.7.28. ' 'Use tifffile.FILETYPE', DeprecationWarning, stacklevel=2, ) return FILETYPE @property def FILLORDER(self) -> type[FILLORDER]: """Deprecated alias of :py:class:`FILLORDER`.""" warnings.warn( ' is deprecated since 2022.7.28. ' 'Use tifffile.FILLORDER', DeprecationWarning, stacklevel=2, ) return FILLORDER @property def PHOTOMETRIC(self) -> type[PHOTOMETRIC]: """Deprecated alias of :py:class:`PHOTOMETRIC`.""" warnings.warn( ' is deprecated since 2022.7.28. ' 'Use tifffile.PHOTOMETRIC', DeprecationWarning, stacklevel=2, ) return PHOTOMETRIC @property def PLANARCONFIG(self) -> type[PLANARCONFIG]: """Deprecated alias of :py:class:`PLANARCONFIG`.""" warnings.warn( ' is deprecated since 2022.7.28. ' 'Use tifffile.PLANARCONFIG', DeprecationWarning, stacklevel=2, ) return PLANARCONFIG @property def RESUNIT(self) -> type[RESUNIT]: """Deprecated alias of :py:class:`RESUNIT`.""" warnings.warn( ' is deprecated since 2022.7.28. ' 'Use tifffile.RESUNIT', DeprecationWarning, stacklevel=2, ) return RESUNIT @property def ORIENTATION(self) -> type[ORIENTATION]: """Deprecated alias of :py:class:`ORIENTATION`.""" warnings.warn( ' is deprecated since 2022.7.28. ' 'Use tifffile.ORIENTATION', DeprecationWarning, stacklevel=2, ) return ORIENTATION @property def SAMPLEFORMAT(self) -> type[SAMPLEFORMAT]: """Deprecated alias of :py:class:`SAMPLEFORMAT`.""" warnings.warn( ' is deprecated since 2022.7.28. ' 'Use tifffile.SAMPLEFORMAT', DeprecationWarning, stacklevel=2, ) return SAMPLEFORMAT @property def DATATYPES(self) -> type[DATATYPE]: """Deprecated alias of :py:class:`DATATYPE`.""" warnings.warn( ' is deprecated since 2022.7.28. ' 'Use tifffile.DATATYPE', DeprecationWarning, stacklevel=2, ) return DATATYPE TIFF = _TIFF() def read_tags( fh: FileHandle, /, byteorder: ByteOrder, offsetsize: int, tagnames: TiffTagRegistry, *, maxifds: int | None = None, customtags: ( dict[int, Callable[[FileHandle, ByteOrder, int, int, int], Any]] | None ) = None, ) -> list[dict[str, Any]]: """Read tags from chain of IFDs. Parameters: fh: Binary file handle to read from. The file handle position must be at a valid IFD header. byteorder: Byte order of TIFF file. offsetsize: Size of offsets in TIFF file (8 for BigTIFF, else 4). tagnames: Map of tag codes to names. For example, :py:class:`_TIFF.GPS_TAGS` or :py:class:`_TIFF.IOP_TAGS`. maxifds: Maximum number of IFDs to read. By default, read the whole IFD chain. customtags: Mapping of tag codes to functions reading special tag value from file. Raises: TiffFileError: Invalid TIFF structure. Notes: This implementation does not support 64-bit NDPI files. """ code: int dtype: int count: int valuebytes: bytes valueoffset: int if offsetsize == 4: offsetformat = byteorder + 'I' tagnosize = 2 tagnoformat = byteorder + 'H' tagsize = 12 tagformat1 = byteorder + 'HH' tagformat2 = byteorder + 'I4s' elif offsetsize == 8: offsetformat = byteorder + 'Q' tagnosize = 8 tagnoformat = byteorder + 'Q' tagsize = 20 tagformat1 = byteorder + 'HH' tagformat2 = byteorder + 'Q8s' else: raise ValueError('invalid offset size') if customtags is None: customtags = {} if maxifds is None: maxifds = 2**32 result: list[dict[str, Any]] = [] unpack = struct.unpack offset = fh.tell() while len(result) < maxifds: # loop over IFDs try: tagno = unpack(tagnoformat, fh.read(tagnosize))[0] if tagno > 4096: raise TiffFileError(f'suspicious number of tags {tagno}') except Exception as exc: logger().error( f' corrupted tag list @{offset} ' f'raised {exc!r:.128}' ) break tags = {} data = fh.read(tagsize * tagno) pos = fh.tell() index = 0 for _ in range(tagno): code, dtype = unpack(tagformat1, data[index : index + 4]) count, valuebytes = unpack( tagformat2, data[index + 4 : index + tagsize] ) index += tagsize name = tagnames.get(code, str(code)) try: valueformat = TIFF.DATA_FORMATS[dtype] except KeyError: logger().error(f'invalid data type {dtype!r} for tag #{code}') continue valuesize = count * struct.calcsize(valueformat) if valuesize > offsetsize or code in customtags: valueoffset = unpack(offsetformat, valuebytes)[0] if valueoffset < 8 or valueoffset + valuesize > fh.size: logger().error( f'invalid value offset {valueoffset} for tag #{code}' ) continue fh.seek(valueoffset) if code in customtags: readfunc = customtags[code] value = readfunc(fh, byteorder, dtype, count, offsetsize) elif dtype in {1, 2, 7}: # BYTES, ASCII, UNDEFINED value = fh.read(valuesize) if len(value) != valuesize: logger().warning( ' ' f'could not read all values for tag #{code}' ) elif code in tagnames: fmt = '{}{}{}'.format( byteorder, count * int(valueformat[0]), valueformat[1] ) value = unpack(fmt, fh.read(valuesize)) else: value = read_numpy(fh, byteorder, dtype, count, offsetsize) elif dtype in {1, 2, 7}: # BYTES, ASCII, UNDEFINED value = valuebytes[:valuesize] else: fmt = '{}{}{}'.format( byteorder, count * int(valueformat[0]), valueformat[1] ) value = unpack(fmt, valuebytes[:valuesize]) process = ( code not in customtags and code not in TIFF.TAG_TUPLE and dtype != 7 # UNDEFINED ) if process and dtype == 2: # TIFF ASCII fields can contain multiple strings, # each terminated with a NUL try: value = bytes2str(stripnull(value, first=False).strip()) except UnicodeDecodeError as exc: logger().warning( ' coercing invalid ASCII to bytes ' f'for tag #{code}, due to {exc!r:.128}' ) else: if code in TIFF.TAG_ENUM: t = TIFF.TAG_ENUM[code] try: value = tuple(t(v) for v in value) except ValueError as exc: if code not in {259, 317}: # ignore compression/predictor logger().warning( f' tag #{code} ' f'raised {exc!r:.128}' ) if process and len(value) == 1: value = value[0] tags[name] = value result.append(tags) # read offset to next page fh.seek(pos) offset = unpack(offsetformat, fh.read(offsetsize))[0] if offset == 0: break if offset >= fh.size: logger().error(f' invalid next page {offset=}') break fh.seek(offset) return result def read_exif_ifd( fh: FileHandle, byteorder: ByteOrder, dtype: int, count: int, offsetsize: int, /, ) -> dict[str, Any]: """Read EXIF tags from file.""" exif = read_tags(fh, byteorder, offsetsize, TIFF.EXIF_TAGS, maxifds=1)[0] for name in ('ExifVersion', 'FlashpixVersion'): try: exif[name] = bytes2str(exif[name]) except Exception: pass if 'UserComment' in exif: idcode = exif['UserComment'][:8] try: if idcode == b'ASCII\x00\x00\x00': exif['UserComment'] = bytes2str(exif['UserComment'][8:]) elif idcode == b'UNICODE\x00': exif['UserComment'] = exif['UserComment'][8:].decode('utf-16') except Exception: pass return exif def read_gps_ifd( fh: FileHandle, byteorder: ByteOrder, dtype: int, count: int, offsetsize: int, /, ) -> dict[str, Any]: """Read GPS tags from file.""" return read_tags(fh, byteorder, offsetsize, TIFF.GPS_TAGS, maxifds=1)[0] def read_interoperability_ifd( fh: FileHandle, byteorder: ByteOrder, dtype: int, count: int, offsetsize: int, /, ) -> dict[str, Any]: """Read Interoperability tags from file.""" return read_tags(fh, byteorder, offsetsize, TIFF.IOP_TAGS, maxifds=1)[0] def read_bytes( fh: FileHandle, byteorder: ByteOrder, dtype: int, count: int, offsetsize: int, /, ) -> bytes: """Read tag data from file.""" count *= numpy.dtype( 'B' if dtype == 2 else byteorder + TIFF.DATA_FORMATS[dtype][-1] ).itemsize data = fh.read(count) if len(data) != count: logger().warning( ' ' f'failed to read {count} bytes, got {len(data)})' ) return data def read_utf8( fh: FileHandle, byteorder: ByteOrder, dtype: int, count: int, offsetsize: int, /, ) -> str: """Read unicode tag value from file.""" return fh.read(count).decode() def read_numpy( fh: FileHandle, byteorder: ByteOrder, dtype: int, count: int, offsetsize: int, /, ) -> NDArray[Any]: """Read NumPy array tag value from file.""" return fh.read_array( 'b' if dtype == 2 else byteorder + TIFF.DATA_FORMATS[dtype][-1], count ) def read_colormap( fh: FileHandle, byteorder: ByteOrder, dtype: int, count: int, offsetsize: int, /, ) -> NDArray[Any]: """Read ColorMap or TransferFunction tag value from file.""" cmap = fh.read_array(byteorder + TIFF.DATA_FORMATS[dtype][-1], count) if count % 3 == 0: cmap.shape = (3, -1) return cmap def read_json( fh: FileHandle, byteorder: ByteOrder, dtype: int, count: int, offsetsize: int, /, ) -> Any: """Read JSON tag value from file.""" data = fh.read(count) try: return json.loads(stripnull(data).decode()) except ValueError as exc: logger().warning(f' raised {exc!r:.128}') return None def read_mm_header( fh: FileHandle, byteorder: ByteOrder, dtype: int, count: int, offsetsize: int, /, ) -> dict[str, Any]: """Read FluoView mm_header tag value from file.""" meta = recarray2dict( fh.read_record(numpy.dtype(TIFF.MM_HEADER), byteorder=byteorder) ) meta['Dimensions'] = [ (bytes2str(d[0]).strip(), d[1], d[2], d[3], bytes2str(d[4]).strip()) for d in meta['Dimensions'] ] d = meta['GrayChannel'] meta['GrayChannel'] = ( bytes2str(d[0]).strip(), d[1], d[2], d[3], bytes2str(d[4]).strip(), ) return meta def read_mm_stamp( fh: FileHandle, byteorder: ByteOrder, dtype: int, count: int, offsetsize: int, /, ) -> NDArray[Any]: """Read FluoView mm_stamp tag value from file.""" return fh.read_array(byteorder + 'f8', 8) def read_uic1tag( fh: FileHandle, byteorder: ByteOrder, dtype: int, count: int, offsetsize: int, /, planecount: int = 0, ) -> dict[str, Any]: """Read MetaMorph STK UIC1Tag value from file. Return empty dictionary if planecount is unknown. """ if dtype not in {4, 5} or byteorder != '<': raise ValueError(f'invalid UIC1Tag {byteorder}{dtype}') result = {} if dtype == 5: # pre MetaMorph 2.5 (not tested) values = fh.read_array(' 1 and tagid in {28, 29, 37, 40, 41}: # silently skip unexpected tags fh.read(4) continue name, value = read_uic_tag(fh, tagid, planecount, True) result[name] = value return result def read_uic2tag( fh: FileHandle, byteorder: ByteOrder, dtype: int, count: int, offsetsize: int, /, ) -> dict[str, NDArray[Any]]: """Read MetaMorph STK UIC2Tag value from file.""" if dtype != 5 or byteorder != '<': raise ValueError('invalid UIC2Tag') values = fh.read_array(' dict[str, NDArray[Any]]: """Read MetaMorph STK UIC3Tag value from file.""" if dtype != 5 or byteorder != '<': raise ValueError('invalid UIC3Tag') values = fh.read_array(' dict[str, NDArray[Any]]: """Read MetaMorph STK UIC4Tag value from file.""" if dtype != 4 or byteorder != '<': raise ValueError('invalid UIC4Tag') result = {} while True: tagid: int = struct.unpack(' tuple[str, Any]: """Read single UIC tag value from file and return tag name and value. UIC1Tags use an offset. """ def read_int() -> int: return int(struct.unpack(' tuple[int, int]: value = struct.unpack('<2I', fh.read(8)) return int(value[0]), (value[1]) try: name, dtype = TIFF.UIC_TAGS[tagid] except IndexError: # unknown tag return f'_TagId{tagid}', read_int() Fraction = TIFF.UIC_TAGS[4][1] if offset: pos = fh.tell() if dtype not in {int, None}: off = read_int() if off < 8: # undocumented cases, or invalid offset if dtype is str: return name, '' if tagid == 41: # AbsoluteZValid return name, off logger().warning( ' ' f'invalid offset for tag {name!r} @{off}' ) return name, off fh.seek(off) value: Any if dtype is None: # skip name = '_' + name value = read_int() elif dtype is int: # int value = read_int() elif dtype is Fraction: # fraction value = read_int2() value = value[0] / value[1] elif dtype is julian_datetime: # datetime value = read_int2() try: value = julian_datetime(*value) except Exception as exc: value = None logger().warning( f' reading {name} raised {exc!r:.128}' ) elif dtype is read_uic_image_property: # ImagePropertyEx value = read_uic_image_property(fh) elif dtype is str: # pascal string size = read_int() if 0 <= size < 2**10: value = struct.unpack(f'{size}s', fh.read(size))[0][:-1] value = bytes2str(stripnull(value)) elif offset: value = '' logger().warning( f' invalid string in tag {name!r}' ) else: raise ValueError(f'invalid string size {size}') elif planecount == 0: value = None elif dtype == '%ip': # sequence of pascal strings value = [] for _ in range(planecount): size = read_int() if 0 <= size < 2**10: string = struct.unpack(f'{size}s', fh.read(size))[0][:-1] string = bytes2str(stripnull(string)) value.append(string) elif offset: logger().warning( f' invalid string in tag {name!r}' ) else: raise ValueError(f'invalid string size: {size}') else: # struct or numpy type dtype = '<' + dtype if '%i' in dtype: dtype = dtype % planecount if '(' in dtype: # numpy type value = fh.read_array(dtype, 1)[0] if value.shape[-1] == 2: # assume fractions value = value[..., 0] / value[..., 1] else: # struct format value = struct.unpack(dtype, fh.read(struct.calcsize(dtype))) if len(value) == 1: value = value[0] if offset: fh.seek(pos + 4) return name, value def read_uic_image_property(fh: FileHandle, /) -> dict[str, Any]: """Read UIC ImagePropertyEx tag value from file.""" # TODO: test this size = struct.unpack('B', fh.read(1))[0] name = struct.unpack(f'{size}s', fh.read(size))[0][:-1] flags, prop = struct.unpack(' dict[str, Any]: """Read CZ_LSMINFO tag value from file.""" if byteorder != '<': raise ValueError('invalid CZ_LSMINFO structure') magic_number, structure_size = struct.unpack(' structure_size: break lsminfo.append((name, typestr)) else: lsminfo = CZ_LSMINFO result = recarray2dict( fh.read_record(numpy.dtype(lsminfo), byteorder=byteorder) ) # read LSM info subrecords at offsets for name, reader in TIFF.CZ_LSMINFO_READERS.items(): if reader is None: continue offset = result.get('Offset' + name, 0) if offset < 8: continue fh.seek(offset) try: result[name] = reader(fh) except ValueError: pass return result def read_lsm_channeldatatypes(fh: FileHandle, /) -> NDArray[Any]: """Read LSM channel data type from file.""" size = struct.unpack(' NDArray[Any]: """Read LSM channel wavelength ranges from file.""" size = struct.unpack(' NDArray[Any]: """Read LSM positions from file.""" size = struct.unpack(' NDArray[Any]: """Read LSM time stamps from file.""" size, count = struct.unpack(' invalid LSM TimeStamps block' ) return numpy.empty((0,), ' list[tuple[float, int, str]]: """Read LSM events from file and return as list of (time, type, text).""" count = struct.unpack(' 0: esize, etime, etype = struct.unpack(' dict[str, Any]: """Read LSM ChannelColors structure from file.""" result = {'Mono': False, 'Colors': [], 'ColorNames': []} pos = fh.tell() (size, ncolors, nnames, coffset, noffset, mono) = struct.unpack( ' ' 'invalid LSM ChannelColors structure' ) return result result['Mono'] = bool(mono) # Colors fh.seek(pos + coffset) colors = fh.read_array('uint8', count=ncolors * 4).reshape((ncolors, 4)) result['Colors'] = colors.tolist() # ColorNames fh.seek(pos + noffset) buffer = fh.read(size - noffset) names = [] while len(buffer) > 4: size = struct.unpack(' dict[str, Any]: """Read LSM lookup tables from file.""" result: dict[str, Any] = {} ( size, nsubblocks, nchannels, luttype, advanced, currentchannel, ) = struct.unpack(' ' 'invalid LSM LookupTables structure' ) return result fh.read(9 * 4) # reserved result['LutType'] = TIFF.CZ_LSM_LUTTYPE(luttype) result['Advanced'] = advanced result['NumberChannels'] = nchannels result['CurrentChannel'] = currentchannel result['SubBlocks'] = subblocks = [] for _ in range(nsubblocks): sbtype = struct.unpack(' ' f'invalid LSM SubBlock type {sbtype}' ) break subblocks.append( {'Type': TIFF.CZ_LSM_SUBBLOCK_TYPE(sbtype), 'Data': data} ) return result def read_lsm_scaninfo(fh: FileHandle, /) -> dict[str, Any]: """Read LSM ScanInfo structure from file.""" value: Any block: dict[str, Any] = {} blocks = [block] unpack = struct.unpack if struct.unpack(' invalid LSM ScanInfo structure' ) return block fh.read(8) while True: entry, dtype, size = unpack(' dict[str, Any]: """Read OlympusSIS structure from file. No specification is available. Only few fields are known. """ result: dict[str, Any] = {} (magic, minute, hour, day, month, year, name, tagcount) = struct.unpack( '<4s6xhhhhh6x32sh', fh.read(60) ) if magic != b'SIS0': raise ValueError('invalid OlympusSIS structure') result['name'] = bytes2str(stripnull(name)) try: result['datetime'] = datetime.datetime( 1900 + year, month + 1, day, hour, minute ) except ValueError: pass data = fh.read(8 * tagcount) for i in range(0, tagcount * 8, 8): tagtype, count, offset = struct.unpack(' dict[str, Any]: """Read OlympusSIS INI string from file.""" inistr = bytes2str(stripnull(fh.read(count))) try: return olympusini_metadata(inistr) except Exception as exc: logger().warning(f' raised {exc!r:.128}') return {} def read_tvips_header( fh: FileHandle, byteorder: ByteOrder, dtype: int, count: int, offsetsize: int, /, ) -> dict[str, Any]: """Read TVIPS EM-MENU headers from file.""" result: dict[str, Any] = {} header_v1 = TIFF.TVIPS_HEADER_V1 header = fh.read_record(numpy.dtype(header_v1), byteorder=byteorder) for name, typestr in header_v1: result[name] = header[name].tolist() if header['Version'] == 2: header_v2 = TIFF.TVIPS_HEADER_V2 header = fh.read_record(numpy.dtype(header_v2), byteorder=byteorder) if header['Magic'] != 0xAAAAAAAA: logger().warning( ' invalid TVIPS v2 magic number' ) return {} # decode utf16 strings for name, typestr in header_v2: if typestr.startswith('V'): s = header[name].tobytes().decode('utf-16', errors='ignore') result[name] = stripnull(s, null='\x00') else: result[name] = header[name].tolist() # convert nm to m for axis in 'XY': header['PhysicalPixelSize' + axis] /= 1e9 header['PixelSize' + axis] /= 1e9 elif header.version != 1: logger().warning( ' unknown TVIPS header version' ) return {} return result def read_fei_metadata( fh: FileHandle, byteorder: ByteOrder, dtype: int, count: int, offsetsize: int, /, ) -> dict[str, Any]: """Read FEI SFEG/HELIOS headers from file.""" result: dict[str, Any] = {} section: dict[str, Any] = {} data = bytes2str(stripnull(fh.read(count))) for line in data.splitlines(): line = line.strip() if line.startswith('['): section = {} result[line[1:-1]] = section continue try: key, value = line.split('=') except ValueError: continue section[key] = astype(value) return result def read_cz_sem( fh: FileHandle, byteorder: ByteOrder, dtype: int, count: int, offsetsize: int, /, ) -> dict[str, Any]: """Read Zeiss SEM tag from file. See https://sourceforge.net/p/gwyddion/mailman/message/29275000/ for unnamed values. """ result: dict[str, Any] = {'': ()} value: Any key = None data = bytes2str(stripnull(fh.read(count))) for line in data.splitlines(): if line.isupper(): key = line.lower() elif key: try: name, value = line.split('=') except ValueError: try: name, value = line.split(':', 1) except Exception: continue value = value.strip() unit = '' try: v, u = value.split() number = astype(v, (int, float)) if number != v: value = number unit = u except Exception: number = astype(value, (int, float)) if number != value: value = number if value in {'No', 'Off'}: value = False elif value in {'Yes', 'On'}: value = True result[key] = (name.strip(), value) if unit: result[key] += (unit,) key = None else: result[''] += (astype(line, (int, float)),) return result def read_nih_image_header( fh: FileHandle, byteorder: ByteOrder, dtype: int, count: int, offsetsize: int, /, ) -> dict[str, Any]: """Read NIH_IMAGE_HEADER tag value from file.""" arr = fh.read_record(TIFF.NIH_IMAGE_HEADER, byteorder=byteorder) arr = arr.view(arr.dtype.newbyteorder(byteorder)) result = recarray2dict(arr) result['XUnit'] = result['XUnit'][: result['XUnitSize']] result['UM'] = result['UM'][: result['UMsize']] return result def read_scanimage_metadata( fh: FileHandle, / ) -> tuple[dict[str, Any], dict[str, Any], int]: """Read ScanImage BigTIFF v3 or v4 static and ROI metadata from file. The settings can be used to read image and metadata without parsing the TIFF file. Frame data and ROI groups can alternatively be obtained from the Software and Artist tags of any TIFF page. Parameters: fh: Binary file handle to read from. Returns: - Non-varying frame data, parsed with :py:func:`matlabstr2py`. - ROI group data, parsed from JSON. - Version of metadata (3 or 4). Raises: ValueError: File does not contain valid ScanImage metadata. """ fh.seek(0) try: byteorder, version = struct.unpack('<2sH', fh.read(4)) if byteorder != b'II' or version != 43: raise ValueError('not a BigTIFF file') fh.seek(16) magic, version, size0, size1 = struct.unpack(' 1 else {} return frame_data, roi_data, version def read_micromanager_metadata( fh: FileHandle | BinaryIO, /, keys: Container[str] | None = None ) -> dict[str, Any]: """Return Micro-Manager non-TIFF settings from file. The settings can be used to read image data without parsing any TIFF structures. Parameters: fh: Open file handle to Micro-Manager TIFF file. keys: Name of keys to return in result. Returns: Micro-Manager non-TIFF settings, which may contain the following keys: - 'MajorVersion' (str) - 'MinorVersion' (str) - 'Summary' (dict): Specifies the dataset, such as shape, dimensions, and coordinates. - 'IndexMap' (numpy.ndarray): (channel, slice, frame, position, ifd_offset) indices of all frames. - 'DisplaySettings' (list[dict]): Image display settings such as channel contrast and colors. - 'Comments' (dict): User comments. Notes: Summary metadata are the same for all files in a dataset. DisplaySettings metadata are frequently corrupted, and Comments are often empty. The Summary and IndexMap metadata are stored at the beginning of the file, while DisplaySettings and Comments are towards the end. Excluding DisplaySettings and Comments from the results may significantly spead up reading metadata of interest. References: - https://micro-manager.org/Micro-Manager_File_Formats - https://github.com/micro-manager/NDTiffStorage """ if keys is None: keys = {'Summary', 'IndexMap', 'DisplaySettings', 'Comments'} fh.seek(0) try: byteorder = {b'II': '<', b'MM': '>'}[fh.read(2)] fh.seek(8) ( index_header, index_offset, ) = struct.unpack(byteorder + 'II', fh.read(8)) except Exception as exc: raise ValueError('not a Micro-Manager TIFF file') from exc result = {} if index_header == 483729: # NDTiff >= v2 result['MajorVersion'] = index_offset try: ( summary_header, summary_length, ) = struct.unpack(byteorder + 'II', fh.read(8)) if summary_header != 2355492: # NDTiff v3 result['MinorVersion'] = summary_header if summary_length != 2355492: raise ValueError( f'invalid summary_length {summary_length}' ) summary_length = struct.unpack(byteorder + 'I', fh.read(4))[0] if 'Summary' in keys: data = fh.read(summary_length) if len(data) != summary_length: raise ValueError('not enough data') result['Summary'] = json.loads(stripnull(data).decode()) except Exception as exc: logger().warning( ' ' f'failed to read NDTiffv{index_offset} summary settings, ' f'raised {exc!r:.128}' ) return result # Micro-Manager multipage TIFF or NDTiff v1 try: ( display_header, display_offset, comments_header, comments_offset, summary_header, summary_length, ) = struct.unpack(byteorder + 'IIIIII', fh.read(24)) except Exception as exc: logger().warning( ' failed to read header, ' f'raised {exc!r:.128}' ) if 'Summary' in keys: try: if summary_header != 2355492: raise ValueError(f'invalid summary_header {summary_header}') data = fh.read(summary_length) if len(data) != summary_length: raise ValueError('not enough data') result['Summary'] = json.loads(stripnull(data).decode()) except Exception as exc: logger().warning( ' ' f'failed to read summary settings, raised {exc!r:.128}' ) if 'IndexMap' in keys: try: if index_header != 54773648: raise ValueError(f'invalid index_header {index_header}') fh.seek(index_offset) header, count = struct.unpack(byteorder + 'II', fh.read(8)) if header != 3453623: raise ValueError('invalid header') data = fh.read(count * 20) result['IndexMap'] = numpy.frombuffer( data, byteorder + 'u4', count * 5 ).reshape(-1, 5) except Exception as exc: logger().warning( ' ' f'failed to read index map, raised {exc!r:.128}' ) if 'DisplaySettings' in keys: try: if display_header != 483765892: raise ValueError(f'invalid display_header {display_header}') fh.seek(display_offset) header, count = struct.unpack(byteorder + 'II', fh.read(8)) if header != 347834724: # display_offset might be wrapped at 4 GB fh.seek(display_offset + 2**32) header, count = struct.unpack(byteorder + 'II', fh.read(8)) if header != 347834724: raise ValueError('invalid display header') data = fh.read(count) if len(data) != count: raise ValueError('not enough data') result['DisplaySettings'] = json.loads(stripnull(data).decode()) except json.decoder.JSONDecodeError: pass # ignore frequent truncated JSON data except Exception as exc: logger().warning( ' ' f'failed to read display settings, raised {exc!r:.128}' ) result['MajorVersion'] = 0 try: if comments_header == 99384722: # Micro-Manager multipage TIFF if 'Comments' in keys: fh.seek(comments_offset) header, count = struct.unpack(byteorder + 'II', fh.read(8)) if header != 84720485: # comments_offset might be wrapped at 4 GB fh.seek(comments_offset + 2**32) header, count = struct.unpack(byteorder + 'II', fh.read(8)) if header != 84720485: raise ValueError('invalid comments header') data = fh.read(count) if len(data) != count: raise ValueError('not enough data') result['Comments'] = json.loads(stripnull(data).decode()) elif comments_header == 483729: # NDTiff v1 result['MajorVersion'] = comments_offset elif comments_header == 0 and comments_offset == 0: pass elif 'Comments' in keys: raise ValueError(f'invalid comments_header {comments_header}') except Exception as exc: logger().warning( ' failed to read comments, ' f'raised {exc!r:.128}' ) return result def read_ndtiff_index( file: str | os.PathLike[Any], / ) -> Iterator[ tuple[dict[str, int | str], str, int, int, int, int, int, int, int, int] ]: """Return iterator over fields in Micro-Manager NDTiff.index file. Parameters: file: Path of NDTiff.index file. Yields: Fields in NDTiff.index file: - axes_dict: Axes indices of frame in image. - filename: Name of file containing frame and metadata. - dataoffset: Offset of frame data in file. - width: Width of frame. - height: Height of frame. - pixeltype: Pixel type. 0: 8-bit monochrome; 1: 16-bit monochrome; 2: 8-bit RGB; 3: 10-bit monochrome; 4: 12-bit monochrome; 5: 14-bit monochrome; 6: 11-bit monochrome. - compression: Pixel compression. 0: Uncompressed. - metaoffset: Offset of JSON metadata in file. - metabytecount: Length of metadata. - metacompression: Metadata compression. 0: Uncompressed. """ with open(file, 'rb') as fh: while True: b = fh.read(4) if len(b) != 4: break k = struct.unpack(' dict[str, str] | None: """Read non-TIFF GDAL structural metadata from file. Return None if the file does not contain valid GDAL structural metadata. The metadata can be used to optimize reading image data from a COG file. """ fh.seek(0) try: if fh.read(2) not in {b'II', b'MM'}: raise ValueError('not a TIFF file') fh.seek({b'*': 8, b'+': 16}[fh.read(1)]) header = fh.read(43).decode() if header[:30] != 'GDAL_STRUCTURAL_METADATA_SIZE=': return None size = int(header[30:36]) lines = fh.read(size).decode() except Exception: return None result: dict[str, Any] = {} try: for line in lines.splitlines(): if '=' in line: key, value = line.split('=', 1) result[key.strip()] = value.strip() except Exception as exc: logger().warning( f' raised {exc!r:.128}' ) return None return result def read_metaseries_catalog(fh: FileHandle | BinaryIO, /) -> None: """Read MetaSeries non-TIFF hint catalog from file. Raise ValueError if the file does not contain a valid hint catalog. """ # TODO: implement read_metaseries_catalog raise NotImplementedError def imagej_metadata_tag( metadata: dict[str, Any], byteorder: ByteOrder, / ) -> tuple[ tuple[int, int, int, bytes, bool], tuple[int, int, int, bytes, bool] ]: """Return IJMetadata and IJMetadataByteCounts tags from metadata dict. Parameters: metadata: May contain the following keys and values: 'Info' (str): Human-readable information as string. 'Labels' (Sequence[str]): Human-readable label for each image. 'Ranges' (Sequence[float]): Lower and upper values for each channel. 'LUTs' (list[numpy.ndarray[(3, 256), 'uint8']]): Color palettes for each channel. 'Plot' (bytes): Undocumented ImageJ internal format. 'ROI', 'Overlays' (bytes): Undocumented ImageJ internal region of interest and overlay format. Can be created with the `roifile `_ package. 'Properties' (dict[str, str]): Map of key, value items. byteorder: Byte order of TIFF file. Returns: IJMetadata and IJMetadataByteCounts tags in :py:meth:`TiffWriter.write` `extratags` format. """ if not metadata: return () # type: ignore header_list = [{'>': b'IJIJ', '<': b'JIJI'}[byteorder]] bytecount_list = [0] body_list = [] def _string(data: str, byteorder: ByteOrder, /) -> bytes: return data.encode('utf-16' + {'>': 'be', '<': 'le'}[byteorder]) def _doubles(data: Sequence[float], byteorder: ByteOrder, /) -> bytes: return struct.pack(f'{byteorder}{len(data)}d', *data) def _ndarray(data: NDArray[Any], byteorder: ByteOrder, /) -> bytes: return data.tobytes() def _bytes(data: bytes, byteorder: ByteOrder, /) -> bytes: return data metadata_types: tuple[ tuple[str, bytes, Callable[[Any, ByteOrder], bytes]], ... ] = ( ('Info', b'info', _string), ('Labels', b'labl', _string), ('Ranges', b'rang', _doubles), ('LUTs', b'luts', _ndarray), ('Plot', b'plot', _bytes), ('ROI', b'roi ', _bytes), ('Overlays', b'over', _bytes), ('Properties', b'prop', _string), ) for key, mtype, func in metadata_types: if key.lower() in metadata: key = key.lower() elif key not in metadata: continue if byteorder == '<': mtype = mtype[::-1] values = metadata[key] if isinstance(values, dict): values = [str(i) for item in values.items() for i in item] count = len(values) elif isinstance(values, list): count = len(values) else: values = [values] count = 1 header_list.append(mtype + struct.pack(byteorder + 'I', count)) for value in values: data = func(value, byteorder) body_list.append(data) bytecount_list.append(len(data)) if not body_list: return () # type: ignore body = b''.join(body_list) header = b''.join(header_list) data = header + body bytecount_list[0] = len(header) bytecounts = struct.pack( byteorder + ('I' * len(bytecount_list)), *bytecount_list ) return ( (50839, 1, len(data), data, True), (50838, 4, len(bytecounts) // 4, bytecounts, True), ) def imagej_metadata( data: bytes, bytecounts: Sequence[int], byteorder: ByteOrder, / ) -> dict[str, Any]: """Return IJMetadata tag value. Parameters: bytes: Encoded value of IJMetadata tag. bytecounts: Value of IJMetadataByteCounts tag. byteorder: Byte order of TIFF file. Returns: Metadata dict with optional items: 'Info' (str): Human-readable information as string. Some formats, such as OIF or ScanImage, can be parsed into dicts with :py:func:`matlabstr2py` or the `oiffile.SettingsFile()` function of the `oiffile `_ package. 'Labels' (Sequence[str]): Human-readable labels for each channel. 'Ranges' (Sequence[float]): Lower and upper values for each channel. 'LUTs' (list[numpy.ndarray[(3, 256), 'uint8']]): Color palettes for each channel. 'Plot' (bytes): Undocumented ImageJ internal format. 'ROI', 'Overlays' (bytes): Undocumented ImageJ internal region of interest and overlay format. Can be parsed with the `roifile `_ package. 'Properties' (dict[str, str]): Map of key, value items. """ def _string(data: bytes, byteorder: ByteOrder, /) -> str: return data.decode('utf-16' + {'>': 'be', '<': 'le'}[byteorder]) def _doubles(data: bytes, byteorder: ByteOrder, /) -> tuple[float, ...]: return struct.unpack(byteorder + ('d' * (len(data) // 8)), data) def _lut(data: bytes, byteorder: ByteOrder, /) -> NDArray[numpy.uint8]: return numpy.frombuffer(data, 'uint8').reshape(-1, 256) def _bytes(data: bytes, byteorder: ByteOrder, /) -> bytes: return data # big-endian metadata_types: dict[ bytes, tuple[str, Callable[[bytes, ByteOrder], Any]] ] = { b'info': ('Info', _string), b'labl': ('Labels', _string), b'rang': ('Ranges', _doubles), b'luts': ('LUTs', _lut), b'plot': ('Plot', _bytes), b'roi ': ('ROI', _bytes), b'over': ('Overlays', _bytes), b'prop': ('Properties', _string), } # little-endian metadata_types.update({k[::-1]: v for k, v in metadata_types.items()}) if len(bytecounts) == 0: raise ValueError('no ImageJ metadata') if data[:4] not in {b'IJIJ', b'JIJI'}: raise ValueError('invalid ImageJ metadata') header_size = bytecounts[0] if header_size < 12 or header_size > 804: raise ValueError('invalid ImageJ metadata header size') ntypes = (header_size - 4) // 8 header = struct.unpack( byteorder + '4sI' * ntypes, data[4 : 4 + ntypes * 8] ) pos = 4 + ntypes * 8 counter = 0 result = {} for mtype, count in zip(header[::2], header[1::2]): values = [] name, func = metadata_types.get(mtype, (bytes2str(mtype), _bytes)) for _ in range(count): counter += 1 pos1 = pos + bytecounts[counter] values.append(func(data[pos:pos1], byteorder)) pos = pos1 result[name.strip()] = values[0] if count == 1 else values prop = result.get('Properties') if prop and len(prop) % 2 == 0: result['Properties'] = dict( prop[i : i + 2] for i in range(0, len(prop), 2) ) return result def imagej_description_metadata(description: str, /) -> dict[str, Any]: r"""Return metatata from ImageJ image description. Raise ValueError if not a valid ImageJ description. >>> description = 'ImageJ=1.11a\nimages=510\nhyperstack=true\n' >>> imagej_description_metadata(description) # doctest: +SKIP {'ImageJ': '1.11a', 'images': 510, 'hyperstack': True} """ def _bool(val: str, /) -> bool: return {'true': True, 'false': False}[val.lower()] result: dict[str, Any] = {} for line in description.splitlines(): try: key, val = line.split('=') except Exception: continue key = key.strip() val = val.strip() for dtype in (int, float, _bool): try: val = dtype(val) # type: ignore break except Exception: pass result[key] = val if 'ImageJ' not in result and 'SCIFIO' not in result: raise ValueError(f'not an ImageJ image description {result!r}') return result def imagej_description( shape: Sequence[int], /, axes: str | None = None, rgb: bool | None = None, colormaped: bool = False, **metadata: Any, # TODO: use TypedDict ) -> str: """Return ImageJ image description from data shape and metadata. Parameters: shape: Shape of image array. axes: Character codes for dimensions in `shape`. ImageJ can handle up to 6 dimensions in order TZCYXS. `Axes` and `shape` are used to determine the images, channels, slices, and frames entries of the image description. rgb: Image is RGB type. colormaped: Image is indexed color. **metadata: Additional items to be included in image description: hyperstack (bool): Image is a hyperstack. The default is True unless `colormapped` is true. mode (str): Display mode: 'grayscale', 'composite', or 'color'. The default is 'grayscale' unless `rgb` or `colormaped` are true. Ignored if `hyperstack` is false. loop (bool): Loop frames back and forth. The default is False. finterval (float): Frame interval in seconds. fps (float): Frames per seconds. The inverse of `finterval`. spacing (float): Voxel spacing in `unit` units. unit (str): Unit for `spacing` and X/YResolution tags. Usually 'um' (micrometer) or 'pixel'. xorigin, yorigin, zorigin (float): X, Y, and Z origins in pixel units. version (str): ImageJ version string. The default is '1.11a'. images, channels, slices, frames (int): Ignored. Examples: >>> imagej_description((51, 5, 2, 196, 171)) # doctest: +SKIP ImageJ=1.11a images=510 channels=2 slices=5 frames=51 hyperstack=true mode=grayscale loop=false """ mode = metadata.pop('mode', None) hyperstack = metadata.pop('hyperstack', None) loop = metadata.pop('loop', None) version = metadata.pop('ImageJ', '1.11a') if colormaped: hyperstack = False rgb = False shape = imagej_shape(shape, rgb=rgb, axes=axes) rgb = shape[-1] in {3, 4} append = [] result = [f'ImageJ={version}'] result.append(f'images={product(shape[:-3])}') if hyperstack is None: hyperstack = True append.append('hyperstack=true') else: append.append(f'hyperstack={bool(hyperstack)}'.lower()) if shape[2] > 1: result.append(f'channels={shape[2]}') if mode is None and not rgb and not colormaped: mode = 'grayscale' if hyperstack and mode: append.append(f'mode={mode}') if shape[1] > 1: result.append(f'slices={shape[1]}') if shape[0] > 1: result.append(f'frames={shape[0]}') if loop is None: append.append('loop=false') if loop is not None: append.append(f'loop={bool(loop)}'.lower()) for key, value in metadata.items(): if key not in {'images', 'channels', 'slices', 'frames', 'SCIFIO'}: if isinstance(value, bool): value = str(value).lower() append.append(f'{key.lower()}={value}') return '\n'.join(result + append + ['']) def imagej_shape( shape: Sequence[int], /, *, rgb: bool | None = None, axes: str | None = None, ) -> tuple[int, ...]: """Return shape normalized to 6D ImageJ hyperstack TZCYXS. Raise ValueError if not a valid ImageJ hyperstack shape or axes order. >>> imagej_shape((2, 3, 4, 5, 3), rgb=False) (2, 3, 4, 5, 3, 1) """ shape = tuple(int(i) for i in shape) ndim = len(shape) if 1 > ndim > 6: raise ValueError('ImageJ hyperstack must be 2-6 dimensional') if axes: if len(axes) != ndim: raise ValueError('ImageJ hyperstack shape and axes do not match') i = 0 axes = axes.upper() for ax in axes: j = 'TZCYXS'.find(ax) if j < i: raise ValueError( 'ImageJ hyperstack axes must be in TZCYXS order' ) i = j ndims = len(axes) newshape = [] i = 0 for ax in 'TZCYXS': if i < ndims and ax == axes[i]: newshape.append(shape[i]) i += 1 else: newshape.append(1) if newshape[-1] not in {1, 3, 4}: raise ValueError( 'ImageJ hyperstack must contain 1, 3, or 4 samples' ) return tuple(newshape) if rgb is None: rgb = shape[-1] in {3, 4} and ndim > 2 if rgb and shape[-1] not in {3, 4}: raise ValueError('ImageJ hyperstack is not a RGB image') if not rgb and ndim == 6 and shape[-1] != 1: raise ValueError('ImageJ hyperstack is not a grayscale image') if rgb or shape[-1] == 1: return (1,) * (6 - ndim) + shape return (1,) * (5 - ndim) + shape + (1,) def jpeg_decode_colorspace( photometric: int, planarconfig: int, extrasamples: tuple[int, ...], jfif: bool, /, ) -> tuple[int | None, int | str | None]: """Return JPEG and output color space for `jpeg_decode` function.""" colorspace: int | None = None outcolorspace: int | str | None = None if extrasamples: pass elif photometric == 6: # YCBCR -> RGB outcolorspace = 2 # RGB elif photometric == 2: # RGB -> RGB if not jfif: # found in Aperio SVS colorspace = 2 outcolorspace = 2 elif photometric == 5: # CMYK outcolorspace = 4 elif photometric > 3: outcolorspace = PHOTOMETRIC(photometric).name if planarconfig != 1: outcolorspace = 1 # decode separate planes to grayscale return colorspace, outcolorspace def jpeg_shape(jpeg: bytes, /) -> tuple[int, int, int, int]: """Return bitdepth and shape of JPEG image.""" i = 0 while i < len(jpeg): marker = struct.unpack('>H', jpeg[i : i + 2])[0] i += 2 if marker == 0xFFD8: # start of image continue if marker == 0xFFD9: # end of image break if 0xFFD0 <= marker <= 0xFFD7: # restart marker continue if marker == 0xFF01: # private marker continue length = struct.unpack('>H', jpeg[i : i + 2])[0] i += 2 if 0xFFC0 <= marker <= 0xFFC3: # start of frame return struct.unpack('>BHHB', jpeg[i : i + 6]) # type: ignore if marker == 0xFFDA: # start of scan break # skip to next marker i += length - 2 raise ValueError('no SOF marker found') def ndpi_jpeg_tile(jpeg: bytes, /) -> tuple[int, int, bytes]: """Return tile shape and JPEG header from JPEG with restart markers.""" marker: int length: int factor: int ncomponents: int restartinterval: int = 0 sofoffset: int = 0 sosoffset: int = 0 i: int = 0 while i < len(jpeg): marker = struct.unpack('>H', jpeg[i : i + 2])[0] i += 2 if marker == 0xFFD8: # start of image continue if marker == 0xFFD9: # end of image break if 0xFFD0 <= marker <= 0xFFD7: # restart marker continue if marker == 0xFF01: # private marker continue length = struct.unpack('>H', jpeg[i : i + 2])[0] i += 2 if marker == 0xFFDD: # define restart interval restartinterval = struct.unpack('>H', jpeg[i : i + 2])[0] elif marker == 0xFFC0: # start of frame sofoffset = i + 1 precision, imlength, imwidth, ncomponents = struct.unpack( '>BHHB', jpeg[i : i + 6] ) i += 6 mcuwidth = 1 mcuheight = 1 for _ in range(ncomponents): cid, factor, table = struct.unpack('>BBB', jpeg[i : i + 3]) i += 3 if factor >> 4 > mcuwidth: mcuwidth = factor >> 4 if factor & 0b00001111 > mcuheight: mcuheight = factor & 0b00001111 mcuwidth *= 8 mcuheight *= 8 i = sofoffset - 1 elif marker == 0xFFDA: # start of scan sosoffset = i + length - 2 break # skip to next marker i += length - 2 if restartinterval == 0 or sofoffset == 0 or sosoffset == 0: raise ValueError('missing required JPEG markers') # patch jpeg header for tile size tilelength = mcuheight tilewidth = restartinterval * mcuwidth jpegheader = ( jpeg[:sofoffset] + struct.pack('>HH', tilelength, tilewidth) + jpeg[sofoffset + 4 : sosoffset] ) return tilelength, tilewidth, jpegheader def shaped_description(shape: Sequence[int], /, **metadata: Any) -> str: """Return JSON image description from data shape and other metadata. Return UTF-8 encoded JSON. >>> shaped_description((256, 256, 3), axes='YXS') # doctest: +SKIP '{"shape": [256, 256, 3], "axes": "YXS"}' """ metadata.update(shape=shape) return json.dumps(metadata) # .encode() def shaped_description_metadata(description: str, /) -> dict[str, Any]: """Return metatata from JSON formatted image description. Raise ValueError if `description` is of unknown format. >>> description = '{"shape": [256, 256, 3], "axes": "YXS"}' >>> shaped_description_metadata(description) # doctest: +SKIP {'shape': [256, 256, 3], 'axes': 'YXS'} >>> shaped_description_metadata('shape=(256, 256, 3)') {'shape': (256, 256, 3)} """ if description[:6] == 'shape=': # old-style 'shaped' description; not JSON shape = tuple(int(i) for i in description[7:-1].split(',')) return {'shape': shape} if description[:1] == '{' and description[-1:] == '}': # JSON description return json.loads(description) raise ValueError('invalid JSON image description', description) def fluoview_description_metadata( description: str, /, ignoresections: Container[str] | None = None, ) -> dict[str, Any]: r"""Return metatata from FluoView image description. The FluoView image description format is unspecified. Expect failures. >>> descr = ( ... '[Intensity Mapping]\nMap Ch0: Range=00000 to 02047\n' ... '[Intensity Mapping End]' ... ) >>> fluoview_description_metadata(descr) {'Intensity Mapping': {'Map Ch0: Range': '00000 to 02047'}} """ if not description.startswith('['): raise ValueError('invalid FluoView image description') if ignoresections is None: ignoresections = {'Region Info (Fields)', 'Protocol Description'} section: Any result: dict[str, Any] = {} sections = [result] comment = False for line in description.splitlines(): if not comment: line = line.strip() if not line: continue if line[0] == '[': if line[-5:] == ' End]': # close section del sections[-1] section = sections[-1] name = line[1:-5] if comment: section[name] = '\n'.join(section[name]) if name[:4] == 'LUT ': a = numpy.array(section[name], dtype=numpy.uint8) a.shape = -1, 3 section[name] = a continue # new section comment = False name = line[1:-1] if name[:4] == 'LUT ': section = [] elif name in ignoresections: section = [] comment = True else: section = {} sections.append(section) result[name] = section continue # add entry if comment: section.append(line) continue lines = line.split('=', 1) if len(line) == 1: section[lines[0].strip()] = None continue key, value = lines if key[:4] == 'RGB ': section.extend(int(rgb) for rgb in value.split()) else: section[key.strip()] = astype(value.strip()) return result def pilatus_description_metadata(description: str, /) -> dict[str, Any]: """Return metatata from Pilatus image description. Return metadata from Pilatus pixel array detectors by Dectris, created by camserver or TVX software. >>> pilatus_description_metadata('# Pixel_size 172e-6 m x 172e-6 m') {'Pixel_size': (0.000172, 0.000172)} """ result: dict[str, Any] = {} values: Any if not description.startswith('# '): return result for c in '#:=,()': description = description.replace(c, ' ') for lines in description.split('\n'): if lines[:2] != ' ': continue line = lines.split() name = line[0] if line[0] not in TIFF.PILATUS_HEADER: try: result['DateTime'] = strptime( ' '.join(line), '%Y-%m-%dT%H %M %S.%f' ) except Exception: result[name] = ' '.join(line[1:]) continue indices, dtype = TIFF.PILATUS_HEADER[line[0]] if isinstance(indices[0], slice): # assumes one slice values = line[indices[0]] else: values = [line[i] for i in indices] if dtype is float and values[0] == 'not': values = ['NaN'] values = tuple(dtype(v) for v in values) if dtype == str: values = ' '.join(values) elif len(values) == 1: values = values[0] result[name] = values return result def svs_description_metadata(description: str, /) -> dict[str, Any]: """Return metatata from Aperio image description. The Aperio image description format is unspecified. Expect failures. >>> svs_description_metadata('Aperio Image Library v1.0') {'Header': 'Aperio Image Library v1.0'} """ if not description.startswith('Aperio '): raise ValueError('invalid Aperio image description') result = {} items = description.split('|') result['Header'] = items[0] for item in items[1:]: key, value = item.split('=', maxsplit=1) result[key.strip()] = astype(value.strip()) return result def stk_description_metadata(description: str, /) -> list[dict[str, Any]]: """Return metadata from MetaMorph image description. The MetaMorph image description format is unspecified. Expect failures. """ description = description.strip() if not description: return [] # try: # description = bytes2str(description) # except UnicodeDecodeError as exc: # logger().warning( # ' raised {exc!r:.128}' # ) # return [] result = [] for plane in description.split('\x00'): d = {} for line in plane.split('\r\n'): lines = line.split(':', 1) if len(lines) > 1: name, value = lines d[name.strip()] = astype(value.strip()) else: value = lines[0].strip() if value: if '' in d: d[''].append(value) else: d[''] = [value] result.append(d) return result def metaseries_description_metadata(description: str, /) -> dict[str, Any]: """Return metatata from MetaSeries image description.""" if not description.startswith(''): raise ValueError('invalid MetaSeries image description') import uuid from xml.etree import ElementTree as etree root = etree.fromstring(description) types: dict[str, Callable[..., Any]] = { 'float': float, 'int': int, 'bool': lambda x: asbool(x, 'on', 'off'), 'time': lambda x: strptime(x, '%Y%m%d %H:%M:%S.%f'), 'guid': uuid.UUID, # 'float-array': # 'colorref': } def parse( root: etree.Element, result: dict[str, Any], / ) -> dict[str, Any]: # recursive for child in root: attrib = child.attrib if not attrib: result[child.tag] = parse(child, {}) continue if 'id' in attrib: i = attrib['id'] t = attrib['type'] v = attrib['value'] if t in types: try: result[i] = types[t](v) except Exception: result[i] = v else: result[i] = v return result adict = parse(root, {}) if 'Description' in adict: adict['Description'] = adict['Description'].replace(' ', '\n') return adict def scanimage_description_metadata(description: str, /) -> Any: """Return metatata from ScanImage image description.""" return matlabstr2py(description) def scanimage_artist_metadata(artist: str, /) -> dict[str, Any] | None: """Return metatata from ScanImage artist tag.""" try: return json.loads(artist) except ValueError as exc: logger().warning( f' raised {exc!r:.128}' ) return None def olympusini_metadata(inistr: str, /) -> dict[str, Any]: """Return OlympusSIS metadata from INI string. No specification is available. """ def keyindex(key: str, /) -> tuple[str, int]: # split key into name and index index = 0 i = len(key.rstrip('0123456789')) if i < len(key): index = int(key[i:]) - 1 key = key[:i] return key, index result: dict[str, Any] = {} bands: list[dict[str, Any]] = [] value: Any zpos: list[Any] | None = None tpos: list[Any] | None = None for line in inistr.splitlines(): line = line.strip() if line == '' or line[0] == ';': continue if line[0] == '[' and line[-1] == ']': section_name = line[1:-1] result[section_name] = section = {} if section_name == 'Dimension': result['axes'] = axes = [] result['shape'] = shape = [] elif section_name == 'ASD': result[section_name] = [] elif section_name == 'Z': if 'Dimension' in result: result[section_name]['ZPos'] = zpos = [] elif section_name == 'Time': if 'Dimension' in result: result[section_name]['TimePos'] = tpos = [] elif section_name == 'Band': nbands = result['Dimension']['Band'] bands = [{'LUT': []} for _ in range(nbands)] result[section_name] = bands iband = 0 else: key, value = line.split('=') if value.strip() == '': value = None elif ',' in value: value = tuple(astype(v) for v in value.split(',')) else: value = astype(value) if section_name == 'Dimension': section[key] = value axes.append(key) shape.append(value) elif section_name == 'ASD': if key == 'Count': result['ASD'] = [{}] * value else: key, index = keyindex(key) result['ASD'][index][key] = value elif section_name == 'Band': if key[:3] == 'LUT': lut = bands[iband]['LUT'] value = struct.pack(' 1: axes.append(sisaxes.get(x, x[0].upper())) shape.append(i) result['axes'] = ''.join(axes) result['shape'] = tuple(shape) try: result['Z']['ZPos'] = numpy.array( result['Z']['ZPos'][: result['Dimension']['Z']], 'float64' ) except Exception: pass try: result['Time']['TimePos'] = numpy.array( result['Time']['TimePos'][: result['Dimension']['Time']], 'int32' ) except Exception: pass for band in bands: band['LUT'] = numpy.array(band['LUT'], 'uint8') return result def astrotiff_description_metadata( description: str, /, sep: str = ':' ) -> dict[str, Any]: """Return metatata from AstroTIFF image description.""" logmsg = ' ' counts: dict[str, int] = {} result: dict[str, Any] = {} value: Any for line in description.splitlines(): line = line.strip() if not line: continue key = line[:8].strip() value = line[8:] if not value.startswith('='): # for example, COMMENT or HISTORY if key + f'{sep}0' not in result: result[key + f'{sep}0'] = value counts[key] = 1 else: result[key + f'{sep}{counts[key]}'] = value counts[key] += 1 continue value = value[1:] if '/' in value: value, comment = value.split('/', 1) comment = comment.strip() else: comment = '' value = value.strip() if not value: # undefined value = None elif value[0] == "'": # string if len(value) < 2: logger().warning(logmsg + f'{key}: invalid string {value!r}') continue if value[-1] == "'": value = value[1:-1] else: # string containing '/' if not ("'" in comment and '/' in comment): logger().warning( logmsg + f'{key}: invalid string {value!r}' ) continue value, comment = line[9:].strip()[1:].split("'", 1) comment = comment.split('/', 1)[-1].strip() # TODO: string containing single quote ' elif value[0] == '(' and value[-1] == ')': # complex number value = value[1:-1] dtype = float if '.' in value else int value = tuple(dtype(v.strip()) for v in value.split(',')) elif value == 'T': value = True elif value == 'F': value = False elif '.' in value: value = float(value) else: try: value = int(value) except Exception: logger().warning(logmsg + f'{key}: invalid value {value!r}') continue if key in result: logger().warning(logmsg + f'{key}: duplicate key') result[key] = value if comment: result[key + f'{sep}COMMENT'] = comment if comment[0] == '[' and ']' in comment: result[key + f'{sep}UNIT'] = comment[1:].split(']', 1)[0] return result def streak_description_metadata( description: str, fh: FileHandle, / ) -> dict[str, Any]: """Return metatata from Hamamatsu streak image description.""" section_pattern = re.compile( r'\[([a-zA-Z0-9 _\-\.]+)\],([^\[]*)', re.DOTALL ) properties_pattern = re.compile( r'([a-zA-Z0-9 _\-\.]+)=(\"[^\"]*\"|[\+\-0-9\.]+|[^,]*)' ) result: dict[str, Any] = {} for section, values in section_pattern.findall(description.strip()): properties = {} for key, value in properties_pattern.findall(values): value = value.strip() if not value or value == '"': value = None elif value[0] == '"' and value[-1] == '"': value = value[1:-1] if ',' in value: try: value = tuple( ( float(v) if '.' in value else int(v[1:] if v[0] == '#' else v) ) for v in value.split(',') ) except ValueError: pass elif '.' in value: try: value = float(value) except ValueError: pass else: try: value = int(value) except ValueError: pass properties[key] = value result[section] = properties if fh and not fh.closed: pos = fh.tell() for scaling in ('ScalingXScaling', 'ScalingYScaling'): try: offset, count = result['Scaling'][scaling + 'File'] fh.seek(offset) result['Scaling'][scaling] = fh.read_array( dtype=' NDArray[Any]: """Return array from bytes containing packed samples. Use to unpack RGB565 or RGB555 to RGB888 format. Works on little-endian platforms only. Parameters: data: Bytes to be decoded. Samples in each pixel are stored consecutively. Pixels are aligned to 8, 16, or 32 bit boundaries. dtype: Data type of samples. The byte order applies also to the data stream. bitspersample: Number of bits for each sample in pixel. rescale: Upscale samples to number of bits in dtype. Returns: Flattened array of unpacked samples of native dtype. Examples: >>> data = struct.pack('BBBB', 0x21, 0x08, 0xFF, 0xFF) >>> print(unpack_rgb(data, '>> print(unpack_rgb(data, '>> print(unpack_rgb(data, '= bits) data_array = numpy.frombuffer(data, dtype.byteorder + dt) result = numpy.empty((data_array.size, len(bitspersample)), dtype.char) for i, bps in enumerate(bitspersample): t = data_array >> int(numpy.sum(bitspersample[i + 1 :])) t &= int('0b' + '1' * bps, 2) if rescale: o = ((dtype.itemsize * 8) // bps + 1) * bps if o > data_array.dtype.itemsize * 8: t = t.astype('I') t *= (2**o - 1) // (2**bps - 1) t //= 2 ** (o - (dtype.itemsize * 8)) result[:, i] = t return result.reshape(-1) def apply_colormap( image: NDArray[Any], colormap: NDArray[Any], /, contig: bool = True ) -> NDArray[Any]: """Return palette-colored image. The image array values are used to index the colormap on axis 1. The returned image array is of shape `image.shape+colormap.shape[0]` and dtype `colormap.dtype`. Parameters: image: Array of indices into colormap. colormap: RGB lookup table aka palette of shape `(3, 2**bitspersample)`. contig: Return contiguous array. Examples: >>> im = numpy.arange(256, dtype='uint8') >>> colormap = numpy.vstack([im, im, im]).astype('uint16') * 256 >>> apply_colormap(im, colormap)[-1] array([65280, 65280, 65280], dtype=uint16) """ image = numpy.take(colormap, image, axis=1) image = numpy.rollaxis(image, 0, image.ndim) if contig: image = numpy.ascontiguousarray(image) return image def parse_filenames( files: Sequence[str], /, pattern: str, axesorder: Sequence[int] | None = None, categories: dict[str, dict[str, int]] | None = None, *, _shape: Sequence[int] | None = None, ) -> tuple[ tuple[str, ...], tuple[int, ...], list[tuple[int, ...]], Sequence[str] ]: r"""Return shape and axes from sequence of file names matching pattern. Parameters: files: Sequence of file names to parse. pattern: Regular expression pattern matching axes names and chunk indices in file names. By default, no pattern matching is performed. Axes names can be specified by matching groups preceding the index groups in the file name, be provided as group names for the index groups, or be omitted. The predefined 'axes' pattern matches Olympus OIF and Leica TIFF series. axesorder: Indices of axes in pattern. By default, axes are returned in the order they appear in pattern. categories: Map of index group matches to integer indices. `{'axislabel': {'category': index}}` _shape: Shape of file sequence. The default is `maximum - minimum + 1` of the parsed indices for each dimension. Returns: - Axes names for each dimension. - Shape of file series. - Index of each file in shape. - Filtered sequence of file names. Examples: >>> parse_filenames( ... ['c1001.ext', 'c2002.ext'], r'([^\d])(\d)(?P\d+)\.ext' ... ) (('c', 't'), (2, 2), [(0, 0), (1, 1)], ['c1001.ext', 'c2002.ext']) """ # TODO: add option to filter files that do not match pattern shape = None if _shape is None else tuple(_shape) if pattern is None: if shape is not None and (len(shape) != 1 or shape[0] < len(files)): raise ValueError( f'shape {(len(files),)} does not fit provided shape {shape}' ) return ( ('I',), (len(files),), tuple((i,) for i in range(len(files))), files, ) pattern = TIFF.FILE_PATTERNS.get(pattern, pattern) if not pattern: raise ValueError('invalid pattern') pattern_compiled: Any if isinstance(pattern, str): pattern_compiled = re.compile(pattern) elif hasattr(pattern, 'groupindex'): pattern_compiled = pattern else: raise ValueError('invalid pattern') if categories is None: categories = {} def parse(fname, /) -> tuple[tuple[str, ...], tuple[int, ...]]: # return axes names and indices from file name assert categories is not None dims: list[str] = [] indices: list[int] = [] groupindex = {v: k for k, v in pattern_compiled.groupindex.items()} match = pattern_compiled.search(fname) if match is None: raise ValueError(f'pattern does not match file name {fname!r}') ax = None for i, m in enumerate(match.groups()): if m is None: continue if i + 1 in groupindex: ax = groupindex[i + 1] elif m[0].isalpha(): ax = m # axis label for next index continue if ax is None: ax = 'Q' # no preceding axis letter try: if ax in categories: m = categories[ax][m] m = int(m) except Exception as exc: raise ValueError(f'invalid index {m!r}') from exc indices.append(m) dims.append(ax) ax = None return tuple(dims), tuple(indices) normpaths = [os.path.normpath(f) for f in files] if len(normpaths) == 1: prefix_str = os.path.dirname(normpaths[0]) else: prefix_str = os.path.commonpath(normpaths) prefix = len(prefix_str) dims: tuple[str, ...] | None = None indices: list[tuple[int, ...]] = [] for fname in normpaths: lbl, idx = parse(fname[prefix:]) if dims is None: dims = lbl if axesorder is not None and ( len(axesorder) != len(dims) or any(i not in axesorder for i in range(len(dims))) ): raise ValueError( f'invalid axesorder {axesorder!r} for {dims!r}' ) elif dims != lbl: raise ValueError('dims do not match within image sequence') if axesorder is not None: idx = tuple(idx[i] for i in axesorder) indices.append(idx) assert dims is not None if axesorder is not None: dims = tuple(dims[i] for i in axesorder) # determine shape indices_array = numpy.array(indices, dtype=numpy.intp) parsedshape = numpy.max(indices, axis=0) if shape is None: startindex = numpy.min(indices_array, axis=0) indices_array -= startindex parsedshape -= startindex parsedshape += 1 shape = tuple(int(i) for i in parsedshape.tolist()) elif len(parsedshape) != len(shape) or any( i > j for i, j in zip(shape, parsedshape) ): raise ValueError( f'parsed shape {parsedshape} does not fit provided shape {shape}' ) indices = [tuple(index) for index in indices_array.tolist()] return dims, shape, indices, files def iter_images(data: NDArray[Any], /) -> Iterator[NDArray[Any]]: """Return iterator over pages in data array of normalized shape.""" yield from data def iter_strips( pageiter: Iterator[NDArray[Any] | None], shape: tuple[int, ...], dtype: numpy.dtype[Any], rowsperstrip: int, /, ) -> Iterator[NDArray[Any]]: """Return iterator over strips in pages.""" numstrips = (shape[-3] + rowsperstrip - 1) // rowsperstrip for iteritem in pageiter: if iteritem is None: # for _ in range(numstrips): # yield None # continue pagedata = numpy.zeros(shape, dtype) else: pagedata = iteritem.reshape(shape) for plane in pagedata: for depth in plane: for i in range(numstrips): yield depth[i * rowsperstrip : (i + 1) * rowsperstrip] def iter_tiles( data: NDArray[Any], tile: tuple[int, ...], tiles: tuple[int, ...], /, ) -> Iterator[NDArray[Any]]: """Return iterator over full tiles in data array of normalized shape. Tiles are zero-padded if necessary. """ if not 1 < len(tile) < 4 or len(tile) != len(tiles): raise ValueError('invalid tile or tiles shape') chunkshape = tile + (data.shape[-1],) chunksize = product(chunkshape) dtype = data.dtype sz, sy, sx = data.shape[2:5] if len(tile) == 2: y, x = tile for page in data: for plane in page: for ty in range(tiles[0]): ty *= y cy = min(y, sy - ty) for tx in range(tiles[1]): tx *= x cx = min(x, sx - tx) chunk = plane[0, ty : ty + cy, tx : tx + cx] if chunk.size != chunksize: chunk_ = numpy.zeros(chunkshape, dtype) chunk_[:cy, :cx] = chunk chunk = chunk_ yield chunk else: z, y, x = tile for page in data: for plane in page: for tz in range(tiles[0]): tz *= z cz = min(z, sz - tz) for ty in range(tiles[1]): ty *= y cy = min(y, sy - ty) for tx in range(tiles[2]): tx *= x cx = min(x, sx - tx) chunk = plane[ tz : tz + cz, ty : ty + cy, tx : tx + cx ] if chunk.size != chunksize: chunk_ = numpy.zeros(chunkshape, dtype) chunk_[:cz, :cy, :cx] = chunk chunk = chunk_ yield chunk[0] if z == 1 else chunk def encode_chunks( numchunks: int, chunkiter: Iterator[NDArray[Any] | None], encode: Callable[[NDArray[Any]], bytes], shape: Sequence[int], dtype: numpy.dtype[Any], maxworkers: int | None, buffersize: int | None, istile: bool, /, ) -> Iterator[bytes]: """Return iterator over encoded chunks.""" if numchunks <= 0: return chunksize = product(shape) * dtype.itemsize if istile: # pad tiles def func(chunk: NDArray[Any] | None, /) -> bytes: if chunk is None: return b'' chunk = numpy.ascontiguousarray(chunk, dtype) if chunk.nbytes != chunksize: # if chunk.dtype != dtype: # raise ValueError('dtype of chunk does not match data') pad = tuple((0, i - j) for i, j in zip(shape, chunk.shape)) chunk = numpy.pad(chunk, pad) return encode(chunk) else: # strips def func(chunk: NDArray[Any] | None, /) -> bytes: if chunk is None: return b'' chunk = numpy.ascontiguousarray(chunk, dtype) return encode(chunk) if maxworkers is None or maxworkers < 2 or numchunks < 2: for _ in range(numchunks): chunk = next(chunkiter) # assert chunk is None or isinstance(chunk, numpy.ndarray) yield func(chunk) del chunk return # because ThreadPoolExecutor.map is not collecting items lazily, reduce # memory overhead by processing chunks iterator maxchunks items at a time if buffersize is None: buffersize = TIFF.BUFFERSIZE * 2 maxchunks = max(maxworkers, buffersize // chunksize) if numchunks <= maxchunks: def chunks() -> Iterator[NDArray[Any] | None]: for _ in range(numchunks): chunk = next(chunkiter) # assert chunk is None or isinstance(chunk, numpy.ndarray) yield chunk # type: ignore del chunk with ThreadPoolExecutor(maxworkers) as executor: yield from executor.map(func, chunks()) return with ThreadPoolExecutor(maxworkers) as executor: count = 1 chunk_list = [] for _ in range(numchunks): chunk = next(chunkiter) if chunk is not None: count += 1 # assert chunk is None or isinstance(chunk, numpy.ndarray) chunk_list.append(chunk) # type: ignore if count == maxchunks: yield from executor.map(func, chunk_list) chunk_list.clear() count = 0 if chunk_list: yield from executor.map(func, chunk_list) def zarr_selection( store: ZarrStore, selection: Any, /, *, groupindex: int | None = None, close: bool = True, out: OutputType = None, ) -> NDArray[Any]: """Return selection from Zarr store. Parameters: store: ZarrStore instance to read selection from. selection: Subset of image to be extracted and returned. Refer to the Zarr documentation for valid selections. groupindex: Index of array if store is zarr group. close: Close store before returning. out: Specifies how image array is returned. By default, create a new array. If a *numpy.ndarray*, a writable array to which the images are copied. If *'memmap'*, create a memory-mapped array in a temporary file. If a *string* or *open file*, the file used to create a memory-mapped array. """ import zarr z = zarr.open(store, mode='r') try: if isinstance(z, zarr.hierarchy.Group): if groupindex is None: groupindex = 0 z = z[groupindex] if out is not None: shape = zarr.indexing.BasicIndexer(selection, z).shape out = create_output(out, shape, z.dtype) result = z.get_basic_selection(selection, out=out) finally: if close: store.close() return result def reorient( image: NDArray[Any], orientation: ORIENTATION | int | str, / ) -> NDArray[Any]: """Return reoriented view of image array. Parameters: image: Non-squeezed output of `asarray` functions. Axes -3 and -2 must be image length and width respectively. orientation: Value of Orientation tag. """ orientation = enumarg(ORIENTATION, orientation) if orientation == ORIENTATION.TOPLEFT: return image if orientation == ORIENTATION.TOPRIGHT: return image[..., ::-1, :] if orientation == ORIENTATION.BOTLEFT: return image[..., ::-1, :, :] if orientation == ORIENTATION.BOTRIGHT: return image[..., ::-1, ::-1, :] if orientation == ORIENTATION.LEFTTOP: return numpy.swapaxes(image, -3, -2) if orientation == ORIENTATION.RIGHTTOP: return numpy.swapaxes(image, -3, -2)[..., ::-1, :] if orientation == ORIENTATION.RIGHTBOT: return numpy.swapaxes(image, -3, -2)[..., ::-1, :, :] if orientation == ORIENTATION.LEFTBOT: return numpy.swapaxes(image, -3, -2)[..., ::-1, ::-1, :] return image def repeat_nd(a: ArrayLike, repeats: Sequence[int], /) -> NDArray[Any]: """Return read-only view into input array with elements repeated. Zoom image array by integer factors using nearest neighbor interpolation (box filter). Parameters: a: Input array. repeats: Number of repetitions to apply along each dimension of input. Examples: >>> repeat_nd([[1, 2], [3, 4]], (2, 2)) array([[1, 1, 2, 2], [1, 1, 2, 2], [3, 3, 4, 4], [3, 3, 4, 4]]) """ reshape: list[int] = [] shape: list[int] = [] strides: list[int] = [] a = numpy.asarray(a) for i, j, k in zip(a.strides, a.shape, repeats): shape.extend((j, k)) strides.extend((i, 0)) reshape.append(j * k) return numpy.lib.stride_tricks.as_strided( a, shape, strides, writeable=False ).reshape(reshape) @overload def reshape_nd( data_or_shape: tuple[int, ...], ndim: int, / ) -> tuple[int, ...]: ... @overload def reshape_nd(data_or_shape: NDArray[Any], ndim: int, /) -> NDArray[Any]: ... def reshape_nd( data_or_shape: tuple[int, ...] | NDArray[Any], ndim: int, / ) -> tuple[int, ...] | NDArray[Any]: """Return image array or shape with at least `ndim` dimensions. Prepend 1s to image shape as necessary. >>> reshape_nd(numpy.empty(0), 1).shape (0,) >>> reshape_nd(numpy.empty(1), 2).shape (1, 1) >>> reshape_nd(numpy.empty((2, 3)), 3).shape (1, 2, 3) >>> reshape_nd(numpy.empty((3, 4, 5)), 3).shape (3, 4, 5) >>> reshape_nd((2, 3), 3) (1, 2, 3) """ if isinstance(data_or_shape, tuple): shape = data_or_shape else: shape = data_or_shape.shape if len(shape) >= ndim: return data_or_shape shape = (1,) * (ndim - len(shape)) + shape if isinstance(data_or_shape, tuple): return shape return data_or_shape.reshape(shape) def squeeze_axes( shape: Sequence[int], axes: str, /, skip: str | None = None, ) -> tuple[tuple[int, ...], str]: """Return shape and axes with length-1 dimensions removed. This implementation is deprecated and kept for compatibility with czifile 2019.7.2. :meta private: """ warnings.warn( ' is deprecated and will change signature', DeprecationWarning, stacklevel=2, ) return _squeeze_axes(shape, axes, skip)[:2] @overload def _squeeze_axes( shape: Sequence[int], axes: str, /, skip: str | None = None, ) -> tuple[tuple[int, ...], str, tuple[bool, ...]]: ... @overload def _squeeze_axes( shape: Sequence[int], axes: Sequence[str], /, skip: Sequence[str] | None = None, ) -> tuple[tuple[int, ...], Sequence[str], tuple[bool, ...]]: ... def _squeeze_axes( shape: Sequence[int], axes: str | Sequence[str], /, skip: str | Sequence[str] | None = None, ) -> tuple[tuple[int, ...], str | Sequence[str], tuple[bool, ...]]: """Return shape and axes with length-1 dimensions removed. Remove unused dimensions unless their axes are listed in `skip`. Parameters: shape: Sequence of dimension sizes. axes: Character codes for dimensions in `shape`. skip: Character codes for dimensions whose length-1 dimensions are not removed. The default is 'XY'. Returns: shape: Sequence of dimension sizes with length-1 dimensions removed. axes: Character codes for dimensions in output `shape`. squeezed: Dimensions were kept (True) or removed (False). Examples: >>> _squeeze_axes((5, 1, 2, 1, 1), 'TZYXC') ((5, 2, 1), 'TYX', (True, False, True, True, False)) >>> _squeeze_axes((1,), 'Q') ((1,), 'Q', (True,)) """ if len(shape) != len(axes): raise ValueError('dimensions of axes and shape do not match') if not axes: return tuple(shape), axes, () if skip is None: skip = 'X', 'Y', 'width', 'height', 'length' squeezed: list[bool] = [] shape_squeezed: list[int] = [] axes_squeezed: list[str] = [] for size, ax in zip(shape, axes): if size > 1 or ax in skip: squeezed.append(True) shape_squeezed.append(size) axes_squeezed.append(ax) else: squeezed.append(False) if len(shape_squeezed) == 0: squeezed[-1] = True shape_squeezed.append(shape[-1]) axes_squeezed.append(axes[-1]) if isinstance(axes, str): axes = ''.join(axes_squeezed) else: axes = tuple(axes_squeezed) return (tuple(shape_squeezed), axes, tuple(squeezed)) def transpose_axes( image: NDArray[Any], axes: str, /, asaxes: Sequence[str] | None = None, ) -> NDArray[Any]: """Return image array with its axes permuted to match specified axes. Parameters: image: Image array to permute. axes: Character codes for dimensions in image array. asaxes: Character codes for dimensions in output image array. The default is 'CTZYX'. Returns: Transposed image array. A length-1 dimension is added for added dimensions. A view of the input array is returned if possible. Examples: >>> transpose_axes( ... numpy.zeros((2, 3, 4, 5)), 'TYXC', asaxes='CTZYX' ... ).shape (5, 2, 1, 3, 4) """ if asaxes is None: asaxes = 'CTZYX' for ax in axes: if ax not in asaxes: raise ValueError(f'unknown axis {ax}') # add missing axes to image shape = image.shape for ax in reversed(asaxes): if ax not in axes: axes = ax + axes shape = (1,) + shape image = image.reshape(shape) # transpose axes image = image.transpose([axes.index(ax) for ax in asaxes]) return image @overload def reshape_axes( axes: str, shape: Sequence[int], newshape: Sequence[int], /, unknown: str | None = None, ) -> str: ... @overload def reshape_axes( axes: Sequence[str], shape: Sequence[int], newshape: Sequence[int], /, unknown: str | None = None, ) -> Sequence[str]: ... def reshape_axes( axes: str | Sequence[str], shape: Sequence[int], newshape: Sequence[int], /, unknown: str | None = None, ) -> str | Sequence[str]: """Return axes matching new shape. Parameters: axes: Character codes for dimensions in `shape`. shape: Input shape matching `axes`. newshape: Output shape matching output axes. Size must match size of `shape`. unknown: Character used for new axes in output. The default is 'Q'. Returns: Character codes for dimensions in `newshape`. Examples: >>> reshape_axes('YXS', (219, 301, 1), (219, 301)) 'YX' >>> reshape_axes('IYX', (12, 219, 301), (3, 4, 219, 1, 301, 1)) 'QQYQXQ' """ shape = tuple(shape) newshape = tuple(newshape) if len(axes) != len(shape): raise ValueError('axes do not match shape') size = product(shape) newsize = product(newshape) if size != newsize: raise ValueError(f'cannot reshape {shape} to {newshape}') if not axes or not newshape: return '' if isinstance(axes, str) else tuple() lendiff = max(0, len(shape) - len(newshape)) if lendiff: newshape = newshape + (1,) * lendiff i = len(shape) - 1 prodns = 1 prods = 1 result = [] for ns in newshape[::-1]: prodns *= ns while i > 0 and shape[i] == 1 and ns != 1: i -= 1 if ns == shape[i] and prodns == prods * shape[i]: prods *= shape[i] result.append(axes[i]) i -= 1 elif unknown: result.append(unknown) else: unknown = 'Q' result.append(unknown) if isinstance(axes, str): axes = ''.join(reversed(result[lendiff:])) else: axes = tuple(reversed(result[lendiff:])) return axes def order_axes( indices: ArrayLike, /, squeeze: bool = False, ) -> tuple[int, ...]: """Return order of axes sorted by variations in indices. Parameters: indices: Multi-dimensional indices of chunks in array. squeeze: Remove length-1 dimensions of nonvarying axes. Returns: Order of axes sorted by variations in indices. The axis with the least variations in indices is returned first, the axis varying fastest is last. Examples: First axis varies fastest, second axis is squeezed: >>> order_axes([(0, 2, 0), (1, 2, 0), (0, 2, 1), (1, 2, 1)], True) (2, 0) """ diff = numpy.sum(numpy.abs(numpy.diff(indices, axis=0)), axis=0).tolist() order = tuple(sorted(range(len(diff)), key=diff.__getitem__)) if squeeze: order = tuple(i for i in order if diff[i] != 0) return order def check_shape( page_shape: Sequence[int], series_shape: Sequence[int] ) -> bool: """Return if page and series shapes are compatible.""" pi = product(page_shape) pj = product(series_shape) if pi == 0 and pj == 0: return True if pi == 0 or pj == 0: return False if pj % pi: return False series_shape = tuple(reversed(series_shape)) a = 0 pi = pj = 1 for i in reversed(page_shape): pi *= i # if a == len(series_shape): # return not pj % pi for j in series_shape[a:]: a += 1 pj *= j if i == j or pi == pj: break if j == 1: continue if pj != pi: return False return True @overload def subresolution( a: TiffPage, b: TiffPage, /, p: int = 2, n: int = 16 ) -> int | None: ... @overload def subresolution( a: TiffPageSeries, b: TiffPageSeries, /, p: int = 2, n: int = 16 ) -> int | None: ... def subresolution( a: TiffPage | TiffPageSeries, b: TiffPage | TiffPageSeries, /, p: int = 2, n: int = 16, ) -> int | None: """Return level of subresolution of series or page b vs a.""" if a.axes != b.axes or a.dtype != b.dtype: return None level = None for ax, i, j in zip(a.axes.lower(), a.shape, b.shape): if ax in 'xyz': if level is None: for r in range(n): d = p**r if d > i: return None if abs((i / d) - j) < 1.0: level = r break else: return None else: d = p**level if d > i: return None if abs((i / d) - j) >= 1.0: return None elif i != j: return None return level def pyramidize_series( series: list[TiffPageSeries], /, isreduced: bool = False ) -> None: """Pyramidize list of TiffPageSeries in-place. TiffPageSeries that are a subresolution of another TiffPageSeries are appended to the other's TiffPageSeries levels and removed from the list. Levels are to be ordered by size using the same downsampling factor. TiffPageSeries of subifds cannot be pyramid top levels. """ samplingfactors = (2, 3, 4) i = 0 while i < len(series): a = series[i] p = None j = i + 1 if a.keyframe.is_subifd: # subifds cannot be pyramid top levels i += 1 continue while j < len(series): b = series[j] if isreduced and not b.keyframe.is_reduced: # pyramid levels must be reduced j += 1 continue # not a pyramid level if p is None: for f in samplingfactors: if subresolution(a.levels[-1], b, p=f) == 1: p = f break # not a pyramid level else: j += 1 continue # not a pyramid level elif subresolution(a.levels[-1], b, p=p) != 1: j += 1 continue a.levels.append(b) del series[j] i += 1 def stack_pages( pages: Sequence[TiffPage | TiffFrame | None], /, *, tiled: TiledSequence | None = None, lock: threading.RLock | NullContext | None = None, maxworkers: int | None = None, out: OutputType = None, **kwargs: Any, ) -> NDArray[Any]: """Return vertically stacked image arrays from sequence of TIFF pages. Parameters: pages: TIFF pages or frames to stack. tiled: Organize pages in non-overlapping grid. lock: Reentrant lock to synchronize seeks and reads from file. maxworkers: Maximum number of threads to concurrently decode pages or segments. By default, use up to :py:attr:`_TIFF.MAXWORKERS` threads. out: Specifies how image array is returned. By default, a new NumPy array is created. If a *numpy.ndarray*, a writable array to which the images are copied. If a string or open file, the file used to create a memory-mapped array. **kwargs: Additional arguments passed to :py:meth:`TiffPage.asarray`. """ npages = len(pages) if npages == 0: raise ValueError('no pages') if npages == 1: kwargs['maxworkers'] = maxworkers assert pages[0] is not None return pages[0].asarray(out=out, **kwargs) page0 = next(p.keyframe for p in pages if p is not None) assert page0 is not None if tiled is None: shape = (npages,) + page0.shape else: shape = tiled.shape dtype = page0.dtype assert dtype is not None out = create_output(out, shape, dtype) # TODO: benchmark and optimize this if maxworkers is None or maxworkers < 1: # auto-detect page_maxworkers = page0.maxworkers maxworkers = min(npages, TIFF.MAXWORKERS) if maxworkers == 1 or page_maxworkers < 1: maxworkers = page_maxworkers = 1 elif npages < 3: maxworkers = 1 elif ( page_maxworkers <= 2 and page0.compression == 1 and page0.fillorder == 1 and page0.predictor == 1 ): maxworkers = 1 else: page_maxworkers = 1 elif maxworkers == 1: maxworkers = page_maxworkers = 1 elif npages > maxworkers or page0.maxworkers < 2: page_maxworkers = 1 else: page_maxworkers = maxworkers maxworkers = 1 kwargs['maxworkers'] = page_maxworkers fh = page0.parent.filehandle if lock is None: haslock = fh.has_lock if not haslock and maxworkers > 1 or page_maxworkers > 1: fh.set_lock(True) lock = fh.lock else: haslock = True filecache = FileCache(size=max(4, maxworkers), lock=lock) if tiled is None: def func( page: TiffPage | TiffFrame | None, index: int, out: Any = out, filecache: FileCache = filecache, kwargs: dict[str, Any] = kwargs, /, ) -> None: # read, decode, and copy page data if page is not None: filecache.open(page.parent.filehandle) page.asarray(lock=lock, out=out[index], **kwargs) filecache.close(page.parent.filehandle) if maxworkers < 2: for index, page in enumerate(pages): func(page, index) else: page0.decode # init TiffPage.decode function with ThreadPoolExecutor(maxworkers) as executor: for _ in executor.map(func, pages, range(npages)): pass else: # TODO: not used or tested def func_tiled( page: TiffPage | TiffFrame | None, index: tuple[int | slice, ...], out: Any = out, filecache: FileCache = filecache, kwargs: dict[str, Any] = kwargs, /, ) -> None: # read, decode, and copy page data if page is not None: filecache.open(page.parent.filehandle) out[index] = page.asarray(lock=lock, **kwargs) filecache.close(page.parent.filehandle) if maxworkers < 2: for index_tiled, page in zip(tiled.slices(), pages): func_tiled(page, index_tiled) else: page0.decode # init TiffPage.decode function with ThreadPoolExecutor(maxworkers) as executor: for _ in executor.map(func_tiled, pages, tiled.slices()): pass filecache.clear() if not haslock: fh.set_lock(False) return out def create_output( out: OutputType, /, shape: Sequence[int], dtype: DTypeLike, *, mode: Literal['r+', 'w+', 'r', 'c'] = 'w+', suffix: str | None = None, fillvalue: int | float | None = 0, ) -> NDArray[Any] | numpy.memmap: """Return NumPy array where images of shape and dtype can be copied. Parameters: out: Specifies kind of array to return: `None`: A new array of shape and dtype is created and returned. `numpy.ndarray`: An existing, writable array compatible with `dtype` and `shape`. A view of the array is returned. `'memmap'` or `'memmap:tempdir'`: A memory-map to an array stored in a temporary binary file on disk is created and returned. `str` or open file: File name or file object used to create a memory-map to an array stored in a binary file on disk. The memory-mapped array is returned. shape: Shape of NumPy array to return. dtype: Data type of NumPy array to return. suffix: Suffix of `NamedTemporaryFile` if `out` is 'memmap'. The default suffix is 'memmap'. fillvalue: Value to initialize newly created arrays. If *None*, return an uninitialized array. """ shape = tuple(shape) if out is None: if fillvalue is None: return numpy.empty(shape, dtype) if fillvalue: out = numpy.empty(shape, dtype) out[:] = fillvalue return out return numpy.zeros(shape, dtype) if isinstance(out, numpy.ndarray): if product(shape) != product(out.shape): raise ValueError('incompatible output shape') if not numpy.can_cast(dtype, out.dtype): raise ValueError('incompatible output dtype') return out.reshape(shape) if isinstance(out, str) and out[:6] == 'memmap': import tempfile tempdir = out[7:] if len(out) > 7 else None if suffix is None: suffix = '.memmap' with tempfile.NamedTemporaryFile(dir=tempdir, suffix=suffix) as fh: out = numpy.memmap(fh, shape=shape, dtype=dtype, mode=mode) if fillvalue: out[:] = fillvalue return out out = numpy.memmap(out, shape=shape, dtype=dtype, mode=mode) if fillvalue: out[:] = fillvalue return out def matlabstr2py(matlabstr: str, /) -> Any: r"""Return Python object from Matlab string representation. Use to access ScanImage metadata. Parameters: matlabstr: String representation of Matlab objects. Returns: Matlab structures are returned as `dict`. Matlab arrays or cells are returned as `lists`. Other Matlab objects are returned as `str`, `bool`, `int`, or `float`. Examples: >>> matlabstr2py('1') 1 >>> matlabstr2py("['x y z' true false; 1 2.0 -3e4; NaN Inf @class]") [['x y z', True, False], [1, 2.0, -30000.0], [nan, inf, '@class']] >>> d = matlabstr2py( ... "SI.hChannels.channelType = {'stripe' 'stripe'}\n" ... "SI.hChannels.channelsActive = 2" ... ) >>> d['SI.hChannels.channelType'] ['stripe', 'stripe'] """ # TODO: handle invalid input # TODO: review unboxing of multidimensional arrays def lex(s: str, /) -> list[str]: # return sequence of tokens from Matlab string representation tokens = ['['] while True: t, i = next_token(s) if t is None: break if t == ';': tokens.extend((']', '[')) elif t == '[': tokens.extend(('[', '[')) elif t == ']': tokens.extend((']', ']')) else: tokens.append(t) s = s[i:] tokens.append(']') return tokens def next_token(s: str, /) -> tuple[str | None, int]: # return next token in Matlab string length = len(s) if length == 0: return None, 0 i = 0 while i < length and s[i] == ' ': i += 1 if i == length: return None, i if s[i] in '{[;]}': return s[i], i + 1 if s[i] == "'": j = i + 1 while j < length and s[j] != "'": j += 1 return s[i : j + 1], j + 1 if s[i] == '<': j = i + 1 while j < length and s[j] != '>': j += 1 return s[i : j + 1], j + 1 j = i while j < length and s[j] not in ' {[;]}': j += 1 return s[i:j], j def value(s: str, fail: bool = False, /) -> Any: # return Python value of token s = s.strip() if not s: return s if len(s) == 1: try: return int(s) except Exception as exc: if fail: raise ValueError from exc return s if s[0] == "'": if fail and s[-1] != "'" or "'" in s[1:-1]: raise ValueError return s[1:-1] if s[0] == '<': if fail and s[-1] != '>' or '<' in s[1:-1]: raise ValueError return s if fail and any(i in s for i in " ';[]{}"): raise ValueError if s[0] == '@': return s if s in {'true', 'True'}: return True if s in {'false', 'False'}: return False if s[:6] == 'zeros(': return numpy.zeros([int(i) for i in s[6:-1].split(',')]).tolist() if s[:5] == 'ones(': return numpy.ones([int(i) for i in s[5:-1].split(',')]).tolist() if '.' in s or 'e' in s: try: return float(s) except Exception: pass try: return int(s) except Exception: pass try: return float(s) # nan, inf except Exception as exc: if fail: raise ValueError from exc return s def parse(s: str, /) -> Any: # return Python value from string representation of Matlab value s = s.strip() try: return value(s, True) except ValueError: pass result: list[Any] addto: list[Any] result = addto = [] levels = [addto] for t in lex(s): if t in '[{': addto = [] levels.append(addto) elif t in ']}': x = levels.pop() addto = levels[-1] if len(x) == 1 and isinstance(x[0], (list, str)): addto.append(x[0]) else: addto.append(x) else: addto.append(value(t)) if len(result) == 1 and isinstance(result[0], (list, str)): return result[0] return result if '\r' in matlabstr or '\n' in matlabstr: # structure d = {} for line in matlabstr.splitlines(): line = line.strip() if not line or line[0] == '%': continue k, v = line.split('=', 1) k = k.strip() if any(c in k for c in " ';[]{}<>"): continue d[k] = parse(v) return d return parse(matlabstr) def strptime( datetime_string: str, format: str | None = None, / ) -> datetime.datetime: """Return datetime corresponding to date string using common formats. Parameters: datetime_string: String representation of date and time. format: Format of `datetime_string`. By default, several datetime formats commonly found in TIFF files are parsed. Raises: ValueError: `datetime_string` does not match any format. Examples: >>> strptime('2022:08:01 22:23:24') datetime.datetime(2022, 8, 1, 22, 23, 24) """ formats = { '%Y:%m:%d %H:%M:%S': 1, # TIFF6 specification '%Y%m%d %H:%M:%S.%f': 2, # MetaSeries '%Y-%m-%dT%H %M %S.%f': 3, # Pilatus '%Y-%m-%dT%H:%M:%S.%f': 4, # ISO '%Y-%m-%dT%H:%M:%S': 5, # ISO, microsecond is 0 '%Y:%m:%d %H:%M:%S.%f': 6, '%d/%m/%Y %H:%M:%S': 7, '%d/%m/%Y %H:%M:%S.%f': 8, '%m/%d/%Y %I:%M:%S %p': 9, '%m/%d/%Y %I:%M:%S.%f %p': 10, '%Y%m%d %H:%M:%S': 11, '%Y/%m/%d %H:%M:%S': 12, '%Y/%m/%d %H:%M:%S.%f': 13, '%Y-%m-%dT%H:%M:%S%z': 14, '%Y-%m-%dT%H:%M:%S.%f%z': 15, } if format is not None: formats[format] = 0 # highest priority; replaces existing key if any for format, _ in sorted(formats.items(), key=lambda item: item[1]): try: return datetime.datetime.strptime(datetime_string, format) except ValueError: pass raise ValueError( f'time data {datetime_string!r} does not match any format' ) @overload def stripnull( string: bytes, /, null: bytes | None = None, *, first: bool = True ) -> bytes: ... @overload def stripnull( string: str, /, null: str | None = None, *, first: bool = True ) -> str: ... def stripnull( string: str | bytes, /, null: str | bytes | None = None, *, first: bool = True, ) -> str | bytes: r"""Return string truncated at first null character. Use to clean NULL terminated C strings. >>> stripnull(b'bytes\x00\x00') b'bytes' >>> stripnull(b'bytes\x00bytes\x00\x00', first=False) b'bytes\x00bytes' >>> stripnull('string\x00') 'string' """ if null is None: if isinstance(string, bytes): null = b'\x00' else: null = '\0' if first: i = string.find(null) # type: ignore return string if i < 0 else string[:i] null = null[0] # type: ignore i = len(string) while i: i -= 1 if string[i] != null: break else: i = -1 return string[: i + 1] def stripascii(string: bytes, /) -> bytes: r"""Return string truncated at last byte that is 7-bit ASCII. Use to clean NULL separated and terminated TIFF strings. >>> stripascii(b'string\x00string\n\x01\x00') b'string\x00string\n' >>> stripascii(b'\x00') b'' """ # TODO: pythonize this i = len(string) while i: i -= 1 if 8 < string[i] < 127: break else: i = -1 return string[: i + 1] @overload def asbool( value: str, /, true: Sequence[str] | None = None, false: Sequence[str] | None = None, ) -> bool: ... @overload def asbool( value: bytes, /, true: Sequence[bytes] | None = None, false: Sequence[bytes] | None = None, ) -> bool: ... def asbool( value: str | bytes, /, true: Sequence[str | bytes] | None = None, false: Sequence[str | bytes] | None = None, ) -> bool | bytes: """Return string as bool if possible, else raise TypeError. >>> asbool(b' False ') False >>> asbool('ON', ['on'], ['off']) True """ value = value.strip().lower() isbytes = False if true is None: if isinstance(value, bytes): if value == b'true': return True isbytes = True elif value == 'true': return True elif value in true: return True if false is None: if isbytes or isinstance(value, bytes): if value == b'false': return False elif value == 'false': return False elif value in false: return False raise TypeError def astype(value: Any, /, types: Sequence[Any] | None = None) -> Any: """Return argument as one of types if possible. >>> astype('42') 42 >>> astype('3.14') 3.14 >>> astype('True') True >>> astype(b'Neee-Wom') 'Neee-Wom' """ if types is None: types = int, float, asbool, bytes2str for typ in types: try: return typ(value) except (ValueError, AttributeError, TypeError, UnicodeEncodeError): pass return value def rational(arg: float | tuple[int, int], /) -> tuple[int, int]: """Return rational numerator and denominator from float or two integers.""" from fractions import Fraction if isinstance(arg, collections.abc.Sequence): f = Fraction(arg[0], arg[1]) # type: ignore else: f = Fraction.from_float(arg) numerator, denominator = f.as_integer_ratio() if numerator > 4294967295 or denominator > 4294967295: s = 4294967295 / max(numerator, denominator) numerator = round(numerator * s) denominator = round(denominator * s) return numerator, denominator def unique_strings(strings: Iterator[str], /) -> Iterator[str]: """Return iterator over unique strings. >>> list(unique_strings(iter(('a', 'b', 'a')))) ['a', 'b', 'a2'] """ known = set() for i, string in enumerate(strings): if string in known: string += str(i) known.add(string) yield string def format_size(size: int | float, /, threshold: int | float = 1536) -> str: """Return file size as string from byte size. >>> format_size(1234) '1234 B' >>> format_size(12345678901) '11.50 GiB' """ if size < threshold: return f'{size} B' for unit in ('KiB', 'MiB', 'GiB', 'TiB', 'PiB'): size /= 1024.0 if size < threshold: return f'{size:.2f} {unit}' return 'ginormous' def identityfunc(arg: Any, /, *args: Any, **kwargs: Any) -> Any: """Single argument identity function. >>> identityfunc('arg') 'arg' """ return arg def nullfunc(*args: Any, **kwargs: Any) -> None: """Null function. >>> nullfunc('arg', kwarg='kwarg') """ return def sequence(value: Any, /) -> Sequence[Any]: """Return tuple containing value if value is not tuple or list. >>> sequence(1) (1,) >>> sequence([1]) [1] >>> sequence('ab') ('ab',) """ return value if isinstance(value, (tuple, list)) else (value,) def product(iterable: Iterable[int], /) -> int: """Return product of sequence of numbers. Equivalent of ``functools.reduce(operator.mul, iterable, 1)``. Multiplying NumPy integers might overflow. >>> product([2**8, 2**30]) 274877906944 >>> product([]) 1 """ prod = 1 for i in iterable: prod *= i return prod def peek_iterator(iterator: Iterator[Any], /) -> tuple[Any, Iterator[Any]]: """Return first item of iterator and iterator. >>> first, it = peek_iterator(iter((0, 1, 2))) >>> first 0 >>> list(it) [0, 1, 2] """ first = next(iterator) def newiter( first: Any = first, iterator: Iterator[Any] = iterator ) -> Iterator[Any]: yield first yield from iterator return first, newiter() def natural_sorted(iterable: Iterable[str], /) -> list[str]: """Return human-sorted list of strings. Use to sort file names. >>> natural_sorted(['f1', 'f2', 'f10']) ['f1', 'f2', 'f10'] """ def sortkey(x: str, /) -> list[int | str]: return [(int(c) if c.isdigit() else c) for c in re.split(numbers, x)] numbers = re.compile(r'(\d+)') return sorted(iterable, key=sortkey) def epics_datetime(sec: int, nsec: int, /) -> datetime.datetime: """Return datetime object from epicsTSSec and epicsTSNsec tag values. >>> epics_datetime(802117916, 103746502) datetime.datetime(2015, 6, 2, 11, 31, 56, 103746) """ return datetime.datetime.fromtimestamp(sec + 631152000 + nsec / 1e9) def excel_datetime( timestamp: float, epoch: int | None = None, / ) -> datetime.datetime: """Return datetime object from timestamp in Excel serial format. Use to convert LSM time stamps. >>> excel_datetime(40237.029999999795) datetime.datetime(2010, 2, 28, 0, 43, 11, 999982) """ if epoch is None: epoch = 693594 return datetime.datetime.fromordinal(epoch) + datetime.timedelta(timestamp) def julian_datetime( julianday: int, millisecond: int = 0, / ) -> datetime.datetime: """Return datetime from days since 1/1/4713 BC and ms since midnight. Convert Julian dates according to MetaMorph. >>> julian_datetime(2451576, 54362783) datetime.datetime(2000, 2, 2, 15, 6, 2, 783) """ if julianday <= 1721423: # return datetime.datetime.min # ? raise ValueError(f'no datetime before year 1 ({julianday=})') a = julianday + 1 if a > 2299160: alpha = math.trunc((a - 1867216.25) / 36524.25) a += 1 + alpha - alpha // 4 b = a + (1524 if a > 1721423 else 1158) c = math.trunc((b - 122.1) / 365.25) d = math.trunc(365.25 * c) e = math.trunc((b - d) / 30.6001) day = b - d - math.trunc(30.6001 * e) month = e - (1 if e < 13.5 else 13) year = c - (4716 if month > 2.5 else 4715) hour, millisecond = divmod(millisecond, 1000 * 60 * 60) minute, millisecond = divmod(millisecond, 1000 * 60) second, millisecond = divmod(millisecond, 1000) return datetime.datetime( year, month, day, hour, minute, second, millisecond ) def byteorder_isnative(byteorder: str, /) -> bool: """Return if byteorder matches system's byteorder. >>> byteorder_isnative('=') True """ if byteorder in {'=', sys.byteorder}: return True keys = {'big': '>', 'little': '<'} return keys.get(byteorder, byteorder) == keys[sys.byteorder] def byteorder_compare(byteorder: str, other: str, /) -> bool: """Return if byteorders match. >>> byteorder_compare('<', '<') True >>> byteorder_compare('>', '<') False """ if byteorder in {other, '|'} or other == '|': return True if byteorder == '=': byteorder = {'big': '>', 'little': '<'}[sys.byteorder] elif other == '=': other = {'big': '>', 'little': '<'}[sys.byteorder] return byteorder == other def recarray2dict(recarray: numpy.recarray, /) -> dict[str, Any]: """Return numpy.recarray as dictionary. >>> r = numpy.array( ... [(1.0, 2, 'a'), (3.0, 4, 'bc')], ... dtype=[('x', '>> recarray2dict(r) {'x': [1.0, 3.0], 'y': [2, 4], 's': ['a', 'bc']} >>> recarray2dict(r[1]) {'x': 3.0, 'y': 4, 's': 'bc'} """ # TODO: subarrays value: Any result = {} for descr in recarray.dtype.descr: name, dtype = descr[:2] value = recarray[name] if value.ndim == 0: value = value.tolist() if dtype[1] == 'S': value = bytes2str(stripnull(value)) elif value.ndim == 1: value = value.tolist() if dtype[1] == 'S': value = [bytes2str(stripnull(v)) for v in value] result[name] = value return result def xml2dict( xml: str, /, *, sanitize: bool = True, prefix: tuple[str, str] | None = None, sep: str = ',', ) -> dict[str, Any]: """Return XML as dictionary. Parameters: xml: XML data to convert. sanitize: Remove prefix from from etree Element. prefix: Prefixes for dictionary keys. sep: Sequence separator. Examples: >>> xml2dict( ... '1' ... ) {'root': {'key': 1, 'attr': 'name'}} >>> xml2dict('3.5322,-3.14') {'level1': {'level2': (3.5322, -3.14)}} """ try: from defusedxml import ElementTree as etree except ImportError: from xml.etree import ElementTree as etree at, tx = prefix if prefix else ('', '') def astype(value: Any, /) -> Any: # return string value as int, float, bool, tuple, or unchanged if not isinstance(value, str): return value if sep and sep in value: # sequence of numbers? values = [] for val in value.split(sep): v = astype(val) if isinstance(v, str): return value values.append(v) return tuple(values) for t in (int, float, asbool): try: return t(value) # type: ignore except (TypeError, ValueError): pass return value def etree2dict(t: Any, /) -> dict[str, Any]: # adapted from https://stackoverflow.com/a/10077069/453463 key = t.tag if sanitize: key = key.rsplit('}', 1)[-1] d: dict[str, Any] = {key: {} if t.attrib else None} children = list(t) if children: dd = collections.defaultdict(list) for dc in map(etree2dict, children): for k, v in dc.items(): dd[k].append(astype(v)) d = { key: { k: astype(v[0]) if len(v) == 1 else astype(v) for k, v in dd.items() } } if t.attrib: d[key].update((at + k, astype(v)) for k, v in t.attrib.items()) if t.text: text = t.text.strip() if children or t.attrib: if text: d[key][tx + 'value'] = astype(text) else: d[key] = astype(text) return d return etree2dict(etree.fromstring(xml)) def hexdump( data: bytes, /, *, width: int = 75, height: int = 24, snipat: int | float | None = 0.75, modulo: int = 2, ellipsis: str | None = None, ) -> str: """Return hexdump representation of bytes. Parameters: data: Bytes to represent as hexdump. width: Maximum width of hexdump. height: Maximum number of lines of hexdump. snipat: Approximate position at which to split long hexdump. modulo: Number of bytes represented in line of hexdump are modulus of this value. ellipsis: Characters to insert for snipped content of long hexdump. The default is '...'. Examples: >>> hexdump(binascii.unhexlify('49492a00080000000e00fe0004000100')) '49 49 2a 00 08 00 00 00 0e 00 fe 00 04 00 01 00 II*.............' """ size = len(data) if size < 1 or width < 2 or height < 1: return '' if height == 1: addr = b'' bytesperline = min( modulo * (((width - len(addr)) // 4) // modulo), size ) if bytesperline < 1: return '' nlines = 1 else: addr = b'%%0%ix: ' % len(b'%x' % size) bytesperline = min( modulo * (((width - len(addr % 1)) // 4) // modulo), size ) if bytesperline < 1: return '' width = 3 * bytesperline + len(addr % 1) nlines = (size - 1) // bytesperline + 1 if snipat is None or snipat == 1: snipat = height elif 0 < abs(snipat) < 1: snipat = int(math.floor(height * snipat)) if snipat < 0: snipat += height assert isinstance(snipat, int) blocks: list[tuple[int, bytes | None]] if height == 1 or nlines == 1: blocks = [(0, data[:bytesperline])] addr = b'' height = 1 width = 3 * bytesperline elif height is None or nlines <= height: blocks = [(0, data)] elif snipat <= 0: start = bytesperline * (nlines - height) blocks = [(start, data[start:])] # (start, None) elif snipat >= height or height < 3: end = bytesperline * height blocks = [(0, data[:end])] # (end, None) else: end1 = bytesperline * snipat end2 = bytesperline * (height - snipat - 2) if size % bytesperline: end2 += size % bytesperline else: end2 += bytesperline blocks = [ (0, data[:end1]), (size - end1 - end2, None), (size - end2, data[size - end2 :]), ] if ellipsis is None: if addr and bytesperline > 3: elps = b' ' * (len(addr % 1) + bytesperline // 2 * 3 - 2) elps += b'...' else: elps = b'...' else: elps = ellipsis.encode('cp1252') result = [] for start, bstr in blocks: if bstr is None: result.append(elps) # 'skip %i bytes' % start) continue hexstr = binascii.hexlify(bstr) strstr = re.sub(br'[^\x20-\x7f]', b'.', bstr) for i in range(0, len(bstr), bytesperline): h = hexstr[2 * i : 2 * i + bytesperline * 2] r = (addr % (i + start)) if height > 1 else addr r += b' '.join(h[i : i + 2] for i in range(0, 2 * bytesperline, 2)) r += b' ' * (width - len(r)) r += strstr[i : i + bytesperline] result.append(r) return b'\n'.join(result).decode('ascii') def isprintable(string: str | bytes, /) -> bool: r"""Return if all characters in string are printable. >>> isprintable('abc') True >>> isprintable(b'\01') False """ string = string.strip() if not string: return True try: return string.isprintable() # type: ignore except Exception: pass try: return string.decode().isprintable() # type: ignore except Exception: pass return False def clean_whitespace(string: str, /, compact: bool = False) -> str: r"""Return string with compressed whitespace. >>> clean_whitespace(' a \n\n b ') 'a\n b' """ string = ( string.replace('\r\n', '\n') .replace('\r', '\n') .replace('\n\n', '\n') .replace('\t', ' ') .replace(' ', ' ') .replace(' ', ' ') .replace(' \n', '\n') ) if compact: string = ( string.replace('\n', ' ') .replace('[ ', '[') .replace(' ', ' ') .replace(' ', ' ') .replace(' ', ' ') ) return string.strip() def indent(*args: Any) -> str: """Return joined string representations of objects with indented lines. >>> print(indent('Title:', 'Text')) Title: Text """ text = '\n'.join(str(arg) for arg in args) return '\n'.join( (' ' + line if line else line) for line in text.splitlines() if line )[2:] def pformat_xml(xml: str | bytes, /) -> str: """Return pretty formatted XML.""" try: from lxml import etree if not isinstance(xml, bytes): xml = xml.encode() tree = etree.parse(io.BytesIO(xml)) xml = etree.tostring( tree, pretty_print=True, xml_declaration=True, encoding=tree.docinfo.encoding, ) assert isinstance(xml, bytes) xml = bytes2str(xml) except Exception: if isinstance(xml, bytes): xml = bytes2str(xml) xml = xml.replace('><', '>\n<') return xml.replace(' ', ' ').replace('\t', ' ') def pformat( arg: Any, /, *, height: int | None = 24, width: int | None = 79, linewidth: int | None = 288, compact: bool = True, ) -> str: """Return pretty formatted representation of object as string. Whitespace might be altered. Long lines are cut off. """ if height is None or height < 1: height = 1024 if width is None or width < 1: width = 256 if linewidth is None or linewidth < 1: linewidth = width npopt = numpy.get_printoptions() numpy.set_printoptions(threshold=100, linewidth=width) if isinstance(arg, bytes): if arg[:5].lower() == b'': arg = bytes2str(arg) if isinstance(arg, bytes): if isprintable(arg): arg = bytes2str(arg) arg = clean_whitespace(arg) else: numpy.set_printoptions(**npopt) return hexdump(arg, width=width, height=height, modulo=1) arg = arg.rstrip() elif isinstance(arg, str): if arg[:5].lower() == '': arg = arg[: 4 * width] if height == 1 else pformat_xml(arg) # too slow # else: # import textwrap # return '\n'.join( # textwrap.wrap(arg, width=width, max_lines=height, tabsize=2) # ) arg = arg.rstrip() elif isinstance(arg, numpy.record): arg = arg.pprint() else: import pprint arg = pprint.pformat(arg, width=width, compact=compact) numpy.set_printoptions(**npopt) if height == 1: arg = arg[: width * width] arg = clean_whitespace(arg, compact=True) return arg[:linewidth] argl = list(arg.splitlines()) if len(argl) > height: arg = '\n'.join( line[:linewidth] for line in argl[: height // 2] + ['...'] + argl[-height // 2 :] ) else: arg = '\n'.join(line[:linewidth] for line in argl[:height]) return arg def snipstr( string: str, /, width: int = 79, *, snipat: int | float | None = None, ellipsis: str | None = None, ) -> str: """Return string cut to specified length. Parameters: string: String to snip. width: Maximum length of returned string. snipat: Approximate position at which to split long strings. The default is 0.5. ellipsis: Characters to insert between splits of long strings. The default is '...'. Examples: >>> snipstr('abcdefghijklmnop', 8) 'abc...op' """ if snipat is None: snipat = 0.5 if ellipsis is None: if isinstance(string, bytes): ellipsis = b'...' else: ellipsis = '\u2026' esize = len(ellipsis) splitlines = string.splitlines() # TODO: finish and test multiline snip result = [] for line in splitlines: if line is None: result.append(ellipsis) continue linelen = len(line) if linelen <= width: result.append(string) continue if snipat is None or snipat == 1: split = linelen elif 0 < abs(snipat) < 1: split = int(math.floor(linelen * snipat)) else: split = int(snipat) if split < 0: split += linelen split = max(split, 0) if esize == 0 or width < esize + 1: if split <= 0: result.append(string[-width:]) else: result.append(string[:width]) elif split <= 0: result.append(ellipsis + string[esize - width :]) elif split >= linelen or width < esize + 4: result.append(string[: width - esize] + ellipsis) else: splitlen = linelen - width + esize end1 = split - splitlen // 2 end2 = end1 + splitlen result.append(string[:end1] + ellipsis + string[end2:]) if isinstance(string, bytes): return b'\n'.join(result) return '\n'.join(result) def enumstr(enum: Any, /) -> str: """Return short string representation of Enum member. >>> enumstr(PHOTOMETRIC.RGB) 'RGB' """ name = enum.name if name is None: name = str(enum) return name def enumarg(enum: type[enum.IntEnum], arg: Any, /) -> enum.IntEnum: """Return enum member from its name or value. Parameters: enum: Type of IntEnum. arg: Name or value of enum member. Returns: Enum member matching name or value. Raises: ValueError: No enum member matches name or value. Examples: >>> enumarg(PHOTOMETRIC, 2) >>> enumarg(PHOTOMETRIC, 'RGB') """ try: return enum(arg) except Exception: try: return enum[arg.upper()] except Exception as exc: raise ValueError(f'invalid argument {arg!r}') from exc def parse_kwargs( kwargs: dict[str, Any], /, *keys: str, **keyvalues: Any ) -> dict[str, Any]: """Return dict with keys from keys|keyvals and values from kwargs|keyvals. Existing keys are deleted from `kwargs`. >>> kwargs = {'one': 1, 'two': 2, 'four': 4} >>> kwargs2 = parse_kwargs(kwargs, 'two', 'three', four=None, five=5) >>> kwargs == {'one': 1} True >>> kwargs2 == {'two': 2, 'four': 4, 'five': 5} True """ result = {} for key in keys: if key in kwargs: result[key] = kwargs[key] del kwargs[key] for key, value in keyvalues.items(): if key in kwargs: result[key] = kwargs[key] del kwargs[key] else: result[key] = value return result def update_kwargs(kwargs: dict[str, Any], /, **keyvalues: Any) -> None: """Update dict with keys and values if keys do not already exist. >>> kwargs = {'one': 1} >>> update_kwargs(kwargs, one=None, two=2) >>> kwargs == {'one': 1, 'two': 2} True """ for key, value in keyvalues.items(): if key not in kwargs: kwargs[key] = value def kwargs_notnone(**kwargs: Any) -> dict[str, Any]: """Return dict of kwargs which values are not None. >>> kwargs_notnone(one=1, none=None) {'one': 1} """ return dict(item for item in kwargs.items() if item[1] is not None) def logger() -> logging.Logger: """Return logging.getLogger('tifffile').""" return logging.getLogger(__name__.replace('tifffile.tifffile', 'tifffile')) def validate_jhove( filename: str, /, jhove: str | None = None, ignore: Collection[str] | None = None, ) -> None: """Validate TIFF file with ``jhove -m TIFF-hul``. JHOVE does not support the BigTIFF format, more than 50 IFDs, and many TIFF extensions. Parameters: filename: Name of TIFF file to validate. jhove: Path of jhove app. The default is 'jhove'. ignore: Jhove error message to ignore. Raises: ValueError: Jhove printed error message and did not contain one of strings in `ignore`. References: - `JHOVE TIFF-hul Module `_ """ import subprocess if ignore is None: ignore = {'More than 50 IFDs', 'Predictor value out of range'} if jhove is None: jhove = 'jhove' out = subprocess.check_output([jhove, filename, '-m', 'TIFF-hul']) if b'ErrorMessage: ' in out: for line in out.splitlines(): line = line.strip() if line.startswith(b'ErrorMessage: '): error = line[14:].decode() for i in ignore: if i in error: break else: raise ValueError(error) break def tiffcomment( arg: str | os.PathLike[Any] | FileHandle | BinaryIO, /, comment: str | bytes | None = None, pageindex: int | None = None, tagcode: int | str | None = None, ) -> str | None: """Return or replace ImageDescription value in first page of TIFF file. Parameters: arg: Specifies TIFF file to open. comment: 7-bit ASCII string or bytes to replace existing tag value. The existing value is zeroed. pageindex: Index of page which ImageDescription tag value to read or replace. The default is 0. tagcode: Code of tag which value to read or replace. The default is 270 (ImageDescription). Returns: None, if `comment` is specified. Else, the current value of the specified tag in the specified page. """ if pageindex is None: pageindex = 0 if tagcode is None: tagcode = 270 mode: Any = None if comment is None else 'r+' with TiffFile(arg, mode=mode) as tif: page = tif.pages[pageindex] if not isinstance(page, TiffPage): raise IndexError(f'TiffPage {pageindex} not found') tag = page.tags.get(tagcode, None) if tag is None: raise ValueError(f'no {TIFF.TAGS[tagcode]} tag found') if comment is None: return tag.value tag.overwrite(comment) return None def tiff2fsspec( filename: str | os.PathLike[Any], /, url: str, *, out: str | None = None, key: int | None = None, series: int | None = None, level: int | None = None, chunkmode: CHUNKMODE | int | str | None = None, fillvalue: int | float | None = None, zattrs: dict[str, Any] | None = None, squeeze: bool | None = None, groupname: str | None = None, version: int | None = None, ) -> None: """Write fsspec ReferenceFileSystem in JSON format for data in TIFF file. By default, the first series, including all levels, is exported. Parameters: filename: Name of TIFF file to reference. url: Remote location of TIFF file without file name(s). out: Name of output JSON file. The default is the `filename` with a '.json' extension. key, series, level, chunkmode, fillvalue, zattrs, squeeze: Passed to :py:meth:`TiffFile.aszarr`. groupname, version: Passed to :py:meth:`ZarrTiffStore.write_fsspec`. """ if out is None: out = os.fspath(filename) + '.json' with TiffFile(filename) as tif: store: ZarrTiffStore with tif.aszarr( key=key, series=series, level=level, chunkmode=chunkmode, fillvalue=fillvalue, zattrs=zattrs, squeeze=squeeze, ) as store: store.write_fsspec(out, url, groupname=groupname, version=version) def lsm2bin( lsmfile: str, /, binfile: str | None = None, *, tile: tuple[int, int] | None = None, verbose: bool = True, ) -> None: """Convert [MP]TZCYX LSM file to series of BIN files. One BIN file containing 'ZCYX' data is created for each position, time, and tile. The position, time, and tile indices are encoded at the end of the filenames. Parameters: lsmfile: Name of LSM file to convert. binfile: Common name of output BIN files. The default is the name of the LSM file without extension. tile: Y and X dimension sizes of BIN files. The default is (256, 256). verbose: Print status of conversion. """ prints: Any = print if verbose else nullfunc if tile is None: tile = (256, 256) if binfile is None: binfile = lsmfile elif binfile.lower() == 'none': binfile = None if binfile: binfile += '_(z%ic%iy%ix%i)_m%%ip%%it%%03iy%%ix%%i.bin' prints('\nOpening LSM file... ', end='', flush=True) timer = Timer() with TiffFile(lsmfile) as lsm: if not lsm.is_lsm: prints('\n', lsm, flush=True) raise ValueError('not a LSM file') series = lsm.series[0] # first series contains the image shape = series.get_shape(False) axes = series.get_axes(False) dtype = series.dtype size = product(shape) * dtype.itemsize prints(timer) # verbose(lsm, flush=True) prints( 'Image\n axes: {}\n shape: {}\n dtype: {}\n size: {}'.format( axes, shape, dtype, format_size(size) ), flush=True, ) if not series.axes.endswith('TZCYX'): raise ValueError('not a *TZCYX LSM file') prints('Copying image from LSM to BIN files', end='', flush=True) timer.start() tiles = shape[-2] // tile[-2], shape[-1] // tile[-1] if binfile: binfile = binfile % (shape[-4], shape[-3], tile[0], tile[1]) shape = (1,) * (7 - len(shape)) + shape # cache for ZCYX stacks and output files data = numpy.empty(shape[3:], dtype=dtype) out = numpy.empty( (shape[-4], shape[-3], tile[0], tile[1]), dtype=dtype ) # iterate over Tiff pages containing data pages = iter(series.pages) for m in range(shape[0]): # mosaic axis for p in range(shape[1]): # position axis for t in range(shape[2]): # time axis for z in range(shape[3]): # z slices page = next(pages) assert page is not None data[z] = page.asarray() for y in range(tiles[0]): # tile y for x in range(tiles[1]): # tile x out[:] = data[ ..., y * tile[0] : (y + 1) * tile[0], x * tile[1] : (x + 1) * tile[1], ] if binfile: out.tofile(binfile % (m, p, t, y, x)) prints('.', end='', flush=True) prints(timer, flush=True) def imshow( data: NDArray[Any], /, *, photometric: PHOTOMETRIC | int | str | None = None, planarconfig: PLANARCONFIG | int | str | None = None, bitspersample: int | None = None, nodata: int | float = 0, interpolation: str | None = None, cmap: Any | None = None, vmin: int | float | None = None, vmax: int | float | None = None, figure: Any = None, subplot: Any = None, title: str | None = None, window_title: str | None = None, dpi: int = 96, maxdim: int | None = None, background: tuple[float, float, float] | str | None = None, show: bool = False, **kwargs: Any, ) -> tuple[Any, Any, Any]: """Plot n-dimensional images with `matplotlib.pyplot`. Parameters: data: Image array to display. photometric: Color space of image. planarconfig: How components of each pixel are stored. bitspersample: Number of bits per channel in integer RGB images. interpolation: Image interpolation method used in `matplotlib.imshow`. The default is 'nearest' for image dimensions > 512, else 'bilinear'. cmap: Colormap mapping non-RGBA scalar data to colors. See `matplotlib.colors.Colormap`. vmin: Minimum of data range covered by colormap. By default, the complete range of the data is covered. vmax: Maximum of data range covered by colormap. By default, the complete range of the data is covered. figure: Matplotlib figure to use for plotting. See `matplotlib.figure.Figure`. subplot: A `matplotlib.pyplot.subplot` axis. title: Subplot title. window_title: Window title. dpi: Resolution of figure. maxdim: Maximum image width and length. background: Background color. show: Display figure. **kwargs: Additional arguments passed to `matplotlib.pyplot.imshow`. Returns: Matplotlib figure, subplot, and plot axis. """ # TODO: rewrite detection of isrgb, iscontig # TODO: use planarconfig if photometric is None: photometric = 'RGB' if maxdim is None: maxdim = 2**16 isrgb = photometric in {'RGB', 'YCBCR'} # 'PALETTE', 'YCBCR' if data.dtype == 'float16': data = data.astype('float32') if data.dtype.kind == 'b': isrgb = False if isrgb and not ( data.shape[-1] in {3, 4} or (data.ndim > 2 and data.shape[-3] in {3, 4}) ): isrgb = False photometric = 'MINISBLACK' data = data.squeeze() if photometric in { None, 'MINISWHITE', 'MINISBLACK', 'CFA', 'MASK', 'PALETTE', 'LOGL', 'LOGLUV', 'DEPTH_MAP', 'SEMANTIC_MASK', }: data = reshape_nd(data, 2) else: data = reshape_nd(data, 3) dims = data.ndim if dims < 2: raise ValueError('not an image') if dims == 2: dims = 0 isrgb = False else: if isrgb and data.shape[-3] in {3, 4} and data.shape[-1] not in {3, 4}: data = numpy.swapaxes(data, -3, -2) data = numpy.swapaxes(data, -2, -1) elif not isrgb and ( data.shape[-1] < data.shape[-2] // 8 and data.shape[-1] < data.shape[-3] // 8 ): data = numpy.swapaxes(data, -3, -1) data = numpy.swapaxes(data, -2, -1) isrgb = isrgb and data.shape[-1] in {3, 4} dims -= 3 if isrgb else 2 if interpolation is None: threshold = 512 elif isinstance(interpolation, int): threshold = interpolation # type: ignore else: threshold = 0 if isrgb: data = data[..., :maxdim, :maxdim, :maxdim] if threshold: if data.shape[-2] > threshold or data.shape[-3] > threshold: interpolation = 'bilinear' else: interpolation = 'nearest' else: data = data[..., :maxdim, :maxdim] if threshold: if data.shape[-1] > threshold or data.shape[-2] > threshold: interpolation = 'bilinear' else: interpolation = 'nearest' if photometric == 'PALETTE' and isrgb: try: datamax = numpy.max(data) except ValueError: datamax = 1 if datamax > 255: data = data >> 8 # possible precision loss data = data.astype('B', copy=False) elif data.dtype.kind in 'ui': if not (isrgb and data.dtype.itemsize <= 1) or bitspersample is None: try: bitspersample = int(math.ceil(math.log(data.max(), 2))) except Exception: bitspersample = data.dtype.itemsize * 8 elif not isinstance(bitspersample, (int, numpy.integer)): # bitspersample can be tuple, such as (5, 6, 5) bitspersample = data.dtype.itemsize * 8 assert bitspersample is not None datamax = 2**bitspersample if isrgb: if bitspersample < 8: data = data << (8 - bitspersample) elif bitspersample > 8: data = data >> (bitspersample - 8) # precision loss data = data.astype('B', copy=False) elif data.dtype.kind == 'f': if nodata: data = data.copy() data[data > 1e30] = 0.0 try: datamax = numpy.max(data) except ValueError: datamax = 1 if isrgb and datamax > 1.0: if data.dtype.char == 'd': data = data.astype('f') data /= datamax else: data = data / datamax elif data.dtype.kind == 'b': datamax = 1 elif data.dtype.kind == 'c': data = numpy.absolute(data) try: datamax = numpy.max(data) except ValueError: datamax = 1 if isrgb: vmin = 0 else: if vmax is None: vmax = datamax if vmin is None: if data.dtype.kind == 'i': imin = numpy.iinfo(data.dtype).min try: vmin = numpy.min(data) except ValueError: vmin = -1 if vmin == imin: vmin = numpy.min(data[data > imin]) elif data.dtype.kind == 'f': fmin = float(numpy.finfo(data.dtype).min) try: vmin = numpy.min(data) except ValueError: vmin = 0.0 if vmin == fmin: vmin = numpy.min(data[data > fmin]) else: vmin = 0 from matplotlib import pyplot if figure is None: pyplot.rc('font', family='sans-serif', weight='normal', size=8) figure = pyplot.figure( dpi=dpi, figsize=(10.3, 6.3), frameon=True, facecolor='1.0', edgecolor='w', ) if window_title is not None: try: figure.canvas.manager.window.title(window_title) except Exception: pass size = len(title.splitlines()) if title else 1 pyplot.subplots_adjust( bottom=0.03 * (dims + 2), top=0.98 - size * 0.03, left=0.1, right=0.95, hspace=0.05, wspace=0.0, ) if subplot is None: subplot = 111 subplot = pyplot.subplot(subplot) if background is None: background = (0.382, 0.382, 0.382) subplot.set_facecolor(background) if title: if isinstance(title, bytes): title = title.decode('Windows-1252') pyplot.title(title, size=11) if cmap is None: if data.dtype.char == '?': cmap = 'gray' elif data.dtype.kind in 'buf' or vmin == 0: cmap = 'viridis' else: cmap = 'coolwarm' if photometric == 'MINISWHITE': cmap += '_r' image = pyplot.imshow( numpy.atleast_2d(data[(0,) * dims].squeeze()), vmin=vmin, vmax=vmax, cmap=cmap, interpolation=interpolation, **kwargs, ) if not isrgb: pyplot.colorbar() # panchor=(0.55, 0.5), fraction=0.05 def format_coord(x: float, y: float, /) -> str: # callback function to format coordinate display in toolbar x = int(x + 0.5) y = int(y + 0.5) try: if dims: return f'{curaxdat[1][y, x]} @ {current} [{y:4}, {x:4}]' return f'{data[y, x]} @ [{y:4}, {x:4}]' except IndexError: return '' def none(event: Any) -> str: return '' subplot.format_coord = format_coord image.get_cursor_data = none # type: ignore image.format_cursor_data = none # type: ignore if dims: current = list((0,) * dims) curaxdat = [0, data[tuple(current)].squeeze()] sliders = [ pyplot.Slider( ax=pyplot.axes((0.125, 0.03 * (axis + 1), 0.725, 0.025)), label=f'Dimension {axis}', valmin=0, valmax=data.shape[axis] - 1, valinit=0, valfmt=f'%.0f [{data.shape[axis]}]', ) for axis in range(dims) ] for slider in sliders: slider.drawon = False def set_image(current, sliders=sliders, data=data): # change image and redraw canvas curaxdat[1] = data[tuple(current)].squeeze() image.set_data(curaxdat[1]) for ctrl, index in zip(sliders, current): ctrl.eventson = False ctrl.set_val(index) ctrl.eventson = True figure.canvas.draw() def on_changed(index, axis, data=data, current=current): # callback function for slider change event index = int(round(index)) curaxdat[0] = axis if index == current[axis]: return if index >= data.shape[axis]: index = 0 elif index < 0: index = data.shape[axis] - 1 current[axis] = index set_image(current) def on_keypressed(event, data=data, current=current): # callback function for key press event key = event.key axis = curaxdat[0] if str(key) in '0123456789': on_changed(key, axis) elif key == 'right': on_changed(current[axis] + 1, axis) elif key == 'left': on_changed(current[axis] - 1, axis) elif key == 'up': curaxdat[0] = 0 if axis == len(data.shape) - 1 else axis + 1 elif key == 'down': curaxdat[0] = len(data.shape) - 1 if axis == 0 else axis - 1 elif key == 'end': on_changed(data.shape[axis] - 1, axis) elif key == 'home': on_changed(0, axis) figure.canvas.mpl_connect('key_press_event', on_keypressed) for axis, ctrl in enumerate(sliders): ctrl.on_changed(lambda k, a=axis: on_changed(k, a)) # type: ignore if show: pyplot.show() return figure, subplot, image def askopenfilename(**kwargs: Any) -> str: """Return file name(s) from Tkinter's file open dialog.""" from tkinter import Tk, filedialog root = Tk() root.withdraw() root.update() filenames = filedialog.askopenfilename(**kwargs) root.destroy() return filenames def main() -> int: """Tifffile command line usage main function.""" import optparse # TODO: use argparse logger().setLevel(logging.INFO) parser = optparse.OptionParser( usage='usage: %prog [options] path', description='Display image and metadata in TIFF file.', version=f'%prog {__version__}', prog='tifffile', ) opt = parser.add_option opt( '-p', '--page', dest='page', type='int', default=-1, help='display single page', ) opt( '-s', '--series', dest='series', type='int', default=-1, help='display select series', ) opt( '-l', '--level', dest='level', type='int', default=-1, help='display pyramid level of series', ) opt( '--nomultifile', dest='nomultifile', action='store_true', default=False, help='do not read OME series from multiple files', ) opt( '--maxplots', dest='maxplots', type='int', default=10, help='maximum number of plot windows', ) opt( '--interpol', dest='interpol', metavar='INTERPOL', default=None, help='image interpolation method', ) opt('--dpi', dest='dpi', type='int', default=96, help='plot resolution') opt( '--vmin', dest='vmin', type='int', default=None, help='minimum value for colormapping', ) opt( '--vmax', dest='vmax', type='int', default=None, help='maximum value for colormapping', ) opt( '--cmap', dest='cmap', type='str', default=None, help='colormap name used to map data to colors', ) opt( '--maxworkers', dest='maxworkers', type='int', default=0, help='maximum number of threads', ) opt( '--debug', dest='debug', action='store_true', default=False, help='raise exception on failures', ) opt( '--doctest', dest='doctest', action='store_true', default=False, help='run docstring examples', ) opt('-v', '--detail', dest='detail', type='int', default=2) opt('-q', '--quiet', dest='quiet', action='store_true') settings, path_list = parser.parse_args() path = ' '.join(path_list) if settings.doctest: import doctest try: import tifffile.tifffile as m except ImportError: m = None # type: ignore doctest.testmod(m, optionflags=doctest.ELLIPSIS) return 0 if not path: path = askopenfilename( title='Select a TIFF file', filetypes=TIFF.FILEOPEN_FILTER ) if not path: parser.error('No file specified') if any(i in path for i in '?*'): path_list = glob.glob(path) if not path_list: print('No files match the pattern') return 0 # TODO: handle image sequences path = path_list[0] if not settings.quiet: print('\nReading TIFF header:', end=' ', flush=True) timer = Timer() try: tif = TiffFile(path, _multifile=not settings.nomultifile) except Exception as exc: if settings.debug: raise print(f'\n\n{exc.__class__.__name__}: {exc}') return 0 if not settings.quiet: print(timer) if tif.is_ome: settings.norgb = True images: list[tuple[Any, Any, Any]] = [] if settings.maxplots > 0: if not settings.quiet: print('Reading image data:', end=' ', flush=True) def notnone(x, /): return next(i for i in x if i is not None) timer.start() try: if settings.page >= 0: images = [ ( tif.asarray( key=settings.page, maxworkers=settings.maxworkers ), tif.pages[settings.page], None, ) ] elif settings.series >= 0: series = tif.series[settings.series] if settings.level >= 0: level = settings.level elif series.is_pyramidal and product(series.shape) > 2**32: level = -1 for r in series.levels: level += 1 if product(r.shape) < 2**32: break else: level = 0 images = [ ( tif.asarray( series=settings.series, level=level, maxworkers=settings.maxworkers, ), notnone(tif.series[settings.series]._pages), tif.series[settings.series], ) ] else: for i, s in enumerate(tif.series[: settings.maxplots]): if settings.level < 0: level = -1 for r in s.levels: level += 1 if product(r.shape) < 2**31: break else: level = settings.level try: images.append( ( tif.asarray( series=i, level=level, maxworkers=settings.maxworkers, ), notnone(s._pages), tif.series[i], ) ) except Exception as exc: images.append((None, notnone(s.pages), None)) if settings.debug: raise print(f'\nSeries {i} raised {exc!r:.128}... ', end='') except Exception as exc: if settings.debug: raise print(f'{exc.__class__.__name__}: {exc}') if not settings.quiet: print(timer) if not settings.quiet: print('Generating report:', end=' ', flush=True) timer.start() try: width = os.get_terminal_size()[0] except Exception: width = 80 info = tif._str(detail=int(settings.detail), width=width - 1) print(timer) print() print(info) print() if images and settings.maxplots > 0: try: from matplotlib import pyplot except ImportError as exc: logger().warning(f' raised {exc!r:.128}') else: for img, page, series in images: if img is None: continue keyframe = page.keyframe vmin, vmax = settings.vmin, settings.vmax if keyframe.nodata: try: vmin = numpy.min(img[img > keyframe.nodata]) except ValueError: pass if tif.is_stk: try: vmin = tif.stk_metadata['MinScale'] # type: ignore vmax = tif.stk_metadata['MaxScale'] # type: ignore except KeyError: pass else: if vmax <= vmin: vmin, vmax = settings.vmin, settings.vmax if series: title = f'{tif}\n{page}\n{series}' window_title = f'{tif.filename} series {series.index}' else: title = f'{tif}\n{page}' window_title = f'{tif.filename} page {page.index}' photometric = 'MINISBLACK' if keyframe.photometric != 3: photometric = PHOTOMETRIC(keyframe.photometric).name imshow( img, title=title, window_title=window_title, vmin=vmin, vmax=vmax, cmap=settings.cmap, bitspersample=keyframe.bitspersample, nodata=keyframe.nodata, photometric=photometric, interpolation=settings.interpol, dpi=settings.dpi, show=False, ) pyplot.show() tif.close() return 0 def bytes2str( b: bytes, /, encoding: str | None = None, errors: str = 'strict' ) -> str: """Return Unicode string from encoded bytes.""" if encoding is not None: return b.decode(encoding, errors) try: return b.decode('utf-8', errors) except UnicodeDecodeError: return b.decode('cp1252', errors) def bytestr(s: str | bytes, /, encoding: str = 'cp1252') -> bytes: """Return bytes from Unicode string, else pass through.""" return s.encode(encoding) if isinstance(s, str) else s # aliases and deprecated TiffReader = TiffFile @final class lazyattr: """Attribute whose value is computed on first access. Not thread-safe. Deprecated: use `functools.cached_property`. """ __slots__ = ('func', '__dict__') def __init__(self, func): self.func = func self.__doc__ = func.__doc__ self.__module__ = func.__module__ self.__name__ = func.__name__ self.__qualname__ = func.__qualname__ # self.lock = threading.RLock() def __get__(self, instance, owner): # with self.lock: if instance is None: return self try: value = self.func(instance) except AttributeError as exc: raise RuntimeError(exc) from exc if value is NotImplemented: return getattr(super(owner, instance), self.func.__name__) setattr(instance, self.func.__name__, value) return value if __name__ == '__main__': sys.exit(main())