// Licensed to the Apache Software Foundation (ASF) under one // or more contributor license agreements. See the NOTICE file // distributed with this work for additional information // regarding copyright ownership. The ASF licenses this file // to you under the Apache License, Version 2.0 (the // "License"); you may not use this file except in compliance // with the License. You may obtain a copy of the License at // // http://www.apache.org/licenses/LICENSE-2.0 // // Unless required by applicable law or agreed to in writing, // software distributed under the License is distributed on an // "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY // KIND, either express or implied. See the License for the // specific language governing permissions and limitations // under the License. #pragma once #include #include #include #include #include #include #include "arrow/util/endian.h" #include "arrow/util/macros.h" #include "arrow/util/ubsan.h" #include "arrow/util/visibility.h" namespace arrow { namespace util { /// \brief Class representing an IEEE half-precision float, encoded as a `uint16_t` /// /// The exact format is as follows (from LSB to MSB): /// - bits 0-10: mantissa /// - bits 10-15: exponent /// - bit 15: sign /// class ARROW_EXPORT Float16 { public: Float16() = default; explicit Float16(float f) : Float16(FromFloat(f)) {} explicit Float16(double d) : Float16(FromDouble(d)) {} template >* = NULLPTR> explicit Float16(T v) : Float16(static_cast(v)) {} /// \brief Create a `Float16` from its exact binary representation constexpr static Float16 FromBits(uint16_t bits) { return Float16{bits, bool{}}; } /// \brief Create a `Float16` from a 32-bit float (may lose precision) static Float16 FromFloat(float f); /// \brief Create a `Float16` from a 64-bit float (may lose precision) static Float16 FromDouble(double d); /// \brief Read a `Float16` from memory in native-endian byte order static Float16 FromBytes(const uint8_t* src) { return FromBits(SafeLoadAs(src)); } /// \brief Read a `Float16` from memory in little-endian byte order static Float16 FromLittleEndian(const uint8_t* src) { return FromBits(::arrow::bit_util::FromLittleEndian(SafeLoadAs(src))); } /// \brief Read a `Float16` from memory in big-endian byte order static Float16 FromBigEndian(const uint8_t* src) { return FromBits(::arrow::bit_util::FromBigEndian(SafeLoadAs(src))); } /// \brief Return the value's binary representation as a `uint16_t` constexpr uint16_t bits() const { return bits_; } /// \brief Return true if the value is negative (sign bit is set) constexpr bool signbit() const { return (bits_ & 0x8000) != 0; } /// \brief Return true if the value is NaN constexpr bool is_nan() const { return (bits_ & 0x7fff) > 0x7c00; } /// \brief Return true if the value is positive/negative infinity constexpr bool is_infinity() const { return (bits_ & 0x7fff) == 0x7c00; } /// \brief Return true if the value is finite and not NaN constexpr bool is_finite() const { return (bits_ & 0x7c00) != 0x7c00; } /// \brief Return true if the value is positive/negative zero constexpr bool is_zero() const { return (bits_ & 0x7fff) == 0; } /// \brief Convert to a 32-bit float float ToFloat() const; /// \brief Convert to a 64-bit float double ToDouble() const; explicit operator float() const { return ToFloat(); } explicit operator double() const { return ToDouble(); } /// \brief Copy the value's bytes in native-endian byte order void ToBytes(uint8_t* dest) const { std::memcpy(dest, &bits_, sizeof(bits_)); } /// \brief Return the value's bytes in native-endian byte order constexpr std::array ToBytes() const { #if ARROW_LITTLE_ENDIAN return ToLittleEndian(); #else return ToBigEndian(); #endif } /// \brief Copy the value's bytes in little-endian byte order void ToLittleEndian(uint8_t* dest) const { const auto bytes = ToLittleEndian(); std::memcpy(dest, bytes.data(), bytes.size()); } /// \brief Return the value's bytes in little-endian byte order constexpr std::array ToLittleEndian() const { #if ARROW_LITTLE_ENDIAN return {uint8_t(bits_ & 0xff), uint8_t(bits_ >> 8)}; #else return {uint8_t(bits_ >> 8), uint8_t(bits_ & 0xff)}; #endif } /// \brief Copy the value's bytes in big-endian byte order void ToBigEndian(uint8_t* dest) const { const auto bytes = ToBigEndian(); std::memcpy(dest, bytes.data(), bytes.size()); } /// \brief Return the value's bytes in big-endian byte order constexpr std::array ToBigEndian() const { #if ARROW_LITTLE_ENDIAN return {uint8_t(bits_ >> 8), uint8_t(bits_ & 0xff)}; #else return {uint8_t(bits_ & 0xff), uint8_t(bits_ >> 8)}; #endif } constexpr Float16 operator-() const { return FromBits(bits_ ^ 0x8000); } constexpr Float16 operator+() const { return FromBits(bits_); } friend constexpr bool operator==(Float16 lhs, Float16 rhs) { if (lhs.is_nan() || rhs.is_nan()) return false; return Float16::CompareEq(lhs, rhs); } friend constexpr bool operator!=(Float16 lhs, Float16 rhs) { return !(lhs == rhs); } friend constexpr bool operator<(Float16 lhs, Float16 rhs) { if (lhs.is_nan() || rhs.is_nan()) return false; return Float16::CompareLt(lhs, rhs); } friend constexpr bool operator>(Float16 lhs, Float16 rhs) { return rhs < lhs; } friend constexpr bool operator<=(Float16 lhs, Float16 rhs) { if (lhs.is_nan() || rhs.is_nan()) return false; return !Float16::CompareLt(rhs, lhs); } friend constexpr bool operator>=(Float16 lhs, Float16 rhs) { return rhs <= lhs; } ARROW_FRIEND_EXPORT friend std::ostream& operator<<(std::ostream& os, Float16 arg); protected: uint16_t bits_; private: constexpr Float16(uint16_t bits, bool) : bits_(bits) {} // Comparison helpers that assume neither operand is NaN static constexpr bool CompareEq(Float16 lhs, Float16 rhs) { return (lhs.bits() == rhs.bits()) || (lhs.is_zero() && rhs.is_zero()); } static constexpr bool CompareLt(Float16 lhs, Float16 rhs) { if (lhs.signbit()) { if (rhs.signbit()) { // Both are negative return lhs.bits() > rhs.bits(); } else { // Handle +/-0 return !lhs.is_zero() || rhs.bits() != 0; } } else if (rhs.signbit()) { return false; } else { // Both are positive return lhs.bits() < rhs.bits(); } } }; static_assert(std::is_trivial_v); } // namespace util } // namespace arrow // TODO: Not complete template <> class std::numeric_limits { using T = arrow::util::Float16; public: static constexpr bool is_specialized = true; static constexpr bool is_signed = true; static constexpr bool has_infinity = true; static constexpr bool has_quiet_NaN = true; static constexpr T min() { return T::FromBits(0b0000010000000000); } static constexpr T max() { return T::FromBits(0b0111101111111111); } static constexpr T lowest() { return -max(); } static constexpr T infinity() { return T::FromBits(0b0111110000000000); } static constexpr T quiet_NaN() { return T::FromBits(0b0111111111111111); } };