""" Store arrays We put arrays on disk as raw bytes, extending along the first dimension. Alongside each array x we ensure the value x.dtype which stores the string description of the array's dtype. """ from contextlib import suppress import pickle import numpy as np from toolz import valmap, identity, partial from .core import Interface from .file import File from .utils import frame, framesplit, suffix def serialize_dtype(dt): """ Serialize dtype to bytes >>> serialize_dtype(np.dtype('i4')) b'>> serialize_dtype(np.dtype('M8[us]')) b'>> parse_dtype(b'i4') dtype('int32') >>> parse_dtype(b"[('a', 'i4')]") dtype([('a', '= (0, 5, 2): unpack_kwargs = {'raw': False} else: unpack_kwargs = {'encoding': 'utf-8'} blocks = [msgpack.unpackb(f, **unpack_kwargs) for f in framesplit(bytes)] except Exception: blocks = [pickle.loads(f) for f in framesplit(bytes)] result = np.empty(sum(map(len, blocks)), dtype='O') i = 0 for block in blocks: result[i:i + len(block)] = block i += len(block) return result else: result = np.frombuffer(bytes, dtype) if copy: result = result.copy() return result compress_text = identity decompress_text = identity compress_bytes = lambda bytes, itemsize: bytes decompress_bytes = identity with suppress(ImportError): import blosc blosc.set_nthreads(1) compress_bytes = blosc.compress decompress_bytes = blosc.decompress compress_text = partial(blosc.compress, typesize=1) decompress_text = blosc.decompress with suppress(ImportError): from snappy import compress as compress_text from snappy import decompress as decompress_text def compress(bytes, dtype): if dtype == 'O': return compress_text(bytes) else: return compress_bytes(bytes, dtype.itemsize) def decompress(bytes, dtype): if dtype == 'O': return decompress_text(bytes) else: return decompress_bytes(bytes)