// Licensed to the Apache Software Foundation (ASF) under one // or more contributor license agreements. See the NOTICE file // distributed with this work for additional information // regarding copyright ownership. The ASF licenses this file // to you under the Apache License, Version 2.0 (the // "License"); you may not use this file except in compliance // with the License. You may obtain a copy of the License at // // http://www.apache.org/licenses/LICENSE-2.0 // // Unless required by applicable law or agreed to in writing, // software distributed under the License is distributed on an // "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY // KIND, either express or implied. See the License for the // specific language governing permissions and limitations // under the License. #pragma once #include #include #include #include #include "arrow/array/array_base.h" #include "arrow/compute/type_fwd.h" #include "arrow/testing/gtest_util.h" #include "arrow/testing/visibility.h" #include "arrow/type_fwd.h" namespace arrow { class ARROW_TESTING_EXPORT ConstantArrayGenerator { public: /// \brief Generates a constant BooleanArray /// /// \param[in] size the size of the array to generate /// \param[in] value to repeat /// /// \return a generated Array static std::shared_ptr Boolean(int64_t size, bool value = false); /// \brief Generates a constant UInt8Array /// /// \param[in] size the size of the array to generate /// \param[in] value to repeat /// /// \return a generated Array static std::shared_ptr UInt8(int64_t size, uint8_t value = 0); /// \brief Generates a constant Int8Array /// /// \param[in] size the size of the array to generate /// \param[in] value to repeat /// /// \return a generated Array static std::shared_ptr Int8(int64_t size, int8_t value = 0); /// \brief Generates a constant UInt16Array /// /// \param[in] size the size of the array to generate /// \param[in] value to repeat /// /// \return a generated Array static std::shared_ptr UInt16(int64_t size, uint16_t value = 0); /// \brief Generates a constant UInt16Array /// /// \param[in] size the size of the array to generate /// \param[in] value to repeat /// /// \return a generated Array static std::shared_ptr Int16(int64_t size, int16_t value = 0); /// \brief Generates a constant UInt32Array /// /// \param[in] size the size of the array to generate /// \param[in] value to repeat /// /// \return a generated Array static std::shared_ptr UInt32(int64_t size, uint32_t value = 0); /// \brief Generates a constant UInt32Array /// /// \param[in] size the size of the array to generate /// \param[in] value to repeat /// /// \return a generated Array static std::shared_ptr Int32(int64_t size, int32_t value = 0); /// \brief Generates a constant UInt64Array /// /// \param[in] size the size of the array to generate /// \param[in] value to repeat /// /// \return a generated Array static std::shared_ptr UInt64(int64_t size, uint64_t value = 0); /// \brief Generates a constant UInt64Array /// /// \param[in] size the size of the array to generate /// \param[in] value to repeat /// /// \return a generated Array static std::shared_ptr Int64(int64_t size, int64_t value = 0); /// \brief Generates a constant Float32Array /// /// \param[in] size the size of the array to generate /// \param[in] value to repeat /// /// \return a generated Array static std::shared_ptr Float32(int64_t size, float value = 0); /// \brief Generates a constant Float64Array /// /// \param[in] size the size of the array to generate /// \param[in] value to repeat /// /// \return a generated Array static std::shared_ptr Float64(int64_t size, double value = 0); /// \brief Generates a constant StringArray /// /// \param[in] size the size of the array to generate /// \param[in] value to repeat /// /// \return a generated Array static std::shared_ptr String(int64_t size, std::string value = ""); template static std::shared_ptr Numeric(int64_t size, CType value = 0) { switch (ArrowType::type_id) { case Type::BOOL: return Boolean(size, static_cast(value)); case Type::UINT8: return UInt8(size, static_cast(value)); case Type::INT8: return Int8(size, static_cast(value)); case Type::UINT16: return UInt16(size, static_cast(value)); case Type::INT16: return Int16(size, static_cast(value)); case Type::UINT32: return UInt32(size, static_cast(value)); case Type::INT32: return Int32(size, static_cast(value)); case Type::UINT64: return UInt64(size, static_cast(value)); case Type::INT64: return Int64(size, static_cast(value)); case Type::FLOAT: return Float32(size, static_cast(value)); case Type::DOUBLE: return Float64(size, static_cast(value)); case Type::INTERVAL_DAY_TIME: case Type::DATE32: { EXPECT_OK_AND_ASSIGN(auto viewed, Int32(size, static_cast(value))->View(date32())); return viewed; } case Type::INTERVAL_MONTHS: { EXPECT_OK_AND_ASSIGN(auto viewed, Int32(size, static_cast(value)) ->View(std::make_shared())); return viewed; } case Type::TIME32: { EXPECT_OK_AND_ASSIGN(auto viewed, Int32(size, static_cast(value)) ->View(std::make_shared(TimeUnit::SECOND))); return viewed; } case Type::TIME64: { EXPECT_OK_AND_ASSIGN(auto viewed, Int64(size, static_cast(value)) ->View(std::make_shared())); return viewed; } case Type::DATE64: { EXPECT_OK_AND_ASSIGN(auto viewed, Int64(size, static_cast(value))->View(date64())); return viewed; } case Type::TIMESTAMP: { EXPECT_OK_AND_ASSIGN( auto viewed, Int64(size, static_cast(value)) ->View(std::make_shared(TimeUnit::SECOND))); return viewed; } default: return nullptr; } } /// \brief Generates a constant Array of zeroes /// /// \param[in] size the size of the array to generate /// \param[in] type the type of the Array /// /// \return a generated Array static std::shared_ptr Zeroes(int64_t size, const std::shared_ptr& type); /// \brief Generates a RecordBatch of zeroes /// /// \param[in] size the size of the array to generate /// \param[in] schema to conform to /// /// This function is handy to return of RecordBatch of a desired shape. /// /// \return a generated RecordBatch static std::shared_ptr Zeroes(int64_t size, const std::shared_ptr& schema); /// \brief Generates a RecordBatchReader by repeating a RecordBatch /// /// \param[in] n_batch the number of times it repeats batch /// \param[in] batch the RecordBatch to repeat /// /// \return a generated RecordBatchReader static std::shared_ptr Repeat( int64_t n_batch, const std::shared_ptr batch); /// \brief Generates a RecordBatchReader of zeroes batches /// /// \param[in] n_batch the number of RecordBatch /// \param[in] batch_size the size of each RecordBatch /// \param[in] schema to conform to /// /// \return a generated RecordBatchReader static std::shared_ptr Zeroes(int64_t n_batch, int64_t batch_size, const std::shared_ptr& schema); }; ARROW_TESTING_EXPORT Result> ScalarVectorToArray(const ScalarVector& scalars); namespace gen { class ARROW_TESTING_EXPORT ArrayGenerator { public: virtual ~ArrayGenerator() = default; virtual Result> Generate(int64_t num_rows) = 0; virtual std::shared_ptr type() const = 0; }; // Same as DataGenerator below but instead of returning Result an ok status is EXPECT'd class ARROW_TESTING_EXPORT GTestDataGenerator { public: virtual ~GTestDataGenerator() = default; virtual std::shared_ptr<::arrow::RecordBatch> RecordBatch(int64_t num_rows) = 0; virtual std::vector> RecordBatches( int64_t rows_per_batch, int num_batches) = 0; virtual ::arrow::compute::ExecBatch ExecBatch(int64_t num_rows) = 0; virtual std::vector<::arrow::compute::ExecBatch> ExecBatches(int64_t rows_per_batch, int num_batches) = 0; virtual std::shared_ptr<::arrow::Table> Table(int64_t rows_per_chunk, int num_chunks = 1) = 0; virtual std::shared_ptr<::arrow::Schema> Schema() = 0; }; class ARROW_TESTING_EXPORT DataGenerator { public: virtual ~DataGenerator() = default; virtual Result> RecordBatch(int64_t num_rows) = 0; virtual Result>> RecordBatches( int64_t rows_per_batch, int num_batches) = 0; virtual Result<::arrow::compute::ExecBatch> ExecBatch(int64_t num_rows) = 0; virtual Result> ExecBatches( int64_t rows_per_batch, int num_batches) = 0; virtual Result> Table(int64_t rows_per_chunk, int num_chunks = 1) = 0; virtual std::shared_ptr<::arrow::Schema> Schema() = 0; /// @brief Converts this generator to a variant that fails (in a googletest sense) /// if any error is encountered. virtual std::unique_ptr FailOnError() = 0; }; /// @brief A potentially named field /// /// If name is not specified then a name will be generated automatically (e.g. f0, f1) struct ARROW_TESTING_EXPORT GeneratorField { public: GeneratorField(std::shared_ptr gen) // NOLINT implicit conversion : name(), gen(std::move(gen)) {} GeneratorField(std::string name, std::shared_ptr gen) : name(std::move(name)), gen(std::move(gen)) {} std::optional name; std::shared_ptr gen; }; /// Create a table generator with the given fields ARROW_TESTING_EXPORT std::shared_ptr Gen( std::vector column_gens); /// make a generator that returns a constant value ARROW_TESTING_EXPORT std::shared_ptr Constant( std::shared_ptr value); /// make a generator that returns an incrementing value /// /// Note: overflow is not prevented standard unsigned integer overflow applies ARROW_TESTING_EXPORT std::shared_ptr Step(uint32_t start = 0, uint32_t step = 1, bool signed_int = false); /// make a generator that returns a random value ARROW_TESTING_EXPORT std::shared_ptr Random( std::shared_ptr type); /// TODO(if-needed) could add a repeat-scalars generator, e.g. Repeat({1, 2, 3}) for /// 1,2,3,1,2,3,1 /// /// TODO(if-needed) could add a repeat-from-json generator e.g. Repeat(int32(), "[1, 2, /// 3]")), same behavior as repeat-scalars } // namespace gen } // namespace arrow