jli  Linuxx86_641.10.3v1.10.30b4590a5507d3f3046e5bafc007cacbbfc9b310bܤ4TablesZ6Pj Ʉ7/./opt/julia/packages/Tables/NSGZI/src/Tables.jlEȀSm7LinearAlgebra$d kT(pDataValueInterfacesF]m/DataAPI)_Vk[J7TableTraits,26.PyGIteratorInterfaceExtensions#]i^r^XźOrderedCollections3/opt/julia/packages/Tables/NSGZI/src/namedtuples.jlEA-/opt/julia/packages/Tables/NSGZI/src/utils.jlA1/opt/julia/packages/Tables/NSGZI/src/fallbacks.jlEA8/opt/julia/packages/Tables/NSGZI/src/tofromdatavalues.jlA./opt/julia/packages/Tables/NSGZI/src/matrix.jlEA-/opt/julia/packages/Tables/NSGZI/src/dicts.jlEA4 CoremуJ5Basemу]J5MainmуJ5ArgToolsBń x(mуF K5 Artifactsmr-V3|mу K5Base64UlD*_mу> K5CRC32c\y.jmуj K5 FileWatchingXzsy`{,zmуh& K5LibdluVW59˗,mу-" K5LoggingT{VhUXM=mуrU" K5MmapP~:xg,Omу|' K5NetworkOptionsC0YW,mуʠ, K5SHAQ<$!<%mу1 K5 Serialization [)*k1mу-G K5Sockets1V$ bdސݗmуYBY K5UnicodeP>I>Nrmуeszo K5 LinearAlgebraSm7̏mуuux K5 OpenBLAS_jll[(Śb6EcQ FmуDux K5libblastrampoline_jllLSۆ }lxӠmу^} K5MarkdownZPn7z`smу/Ed~ K5Printfg^cX׸QDmу;h K5Random_ɢ?\Ymу? K5TarOi>աmу!t, K5DatesEY8pj2 mуX K5FuturebS;3{I xVMmуsD K5InteractiveUtilsWL ~@'ZmуVg K5LibGit2Z[&RPTv3EКRmу8J K5 LibGit2_jll YXg}]$mуD K5 MbedTLS_jllAX 3ȡ_mу- K5 LibSSH2_jlloTZk)߆ Indexable collection with known length Tables.getcolumn(::AbstractColumns, i::Int) => Indexable collection with known length Tables.getcolumn(::AbstractColumns, T, i::Int, nm::Symbol) => Indexable collection with known length Tables.getcolumn(::AbstractRow, nm::Symbol) => Column value Tables.getcolumn(::AbstractRow, i::Int) => Column value Tables.getcolumn(::AbstractRow, T, i::Int, nm::Symbol) => Column value Retrieve an entire column (from `AbstractColumns`) or single row column value (from an `AbstractRow`) by column name (`nm`), index (`i`), or if desired, by column element type (`T`), index (`i`), and name (`nm`). When called on a `AbstractColumns` interface object, the returned object should be a 1-based indexable collection with known length. When called on a `AbstractRow` interface object, it returns the single column value. The methods taking a single `Symbol` or `Int` are both required for the `AbstractColumns` and `AbstractRow` interfaces; the third method is optional if type stability is possible. The default definition of `Tables.getcolumn(x, i::Int)` is `getfield(x, i)`. The default definition of `Tables.getcolumn(x, nm::Symbol)` is `getproperty(x, nm)`. """ function getcolumn end getcolumn(x, i::Int) = getfield(x, i) getcolumn(x, nm::Symbol) = getproperty(x, nm) getcolumn(x, ::Type{T}, i::Int, nm::Symbol) where {T} = getcolumn(x, nm) getcolumn(x::NamedTuple{names, types}, ::Type{T}, i::Int, nm::Symbol) where {names, types, T} = Core.getfield(x, i) """ Tables.columnnames(::Union{AbstractColumns, AbstractRow}) => Indexable collection Retrieves the list of column names as a 1-based indexable collection (like a `Tuple` or `Vector`) for a `AbstractColumns` or `AbstractRow` interface object. The default definition calls `propertynames(x)`. The returned column names must be unique. """ function columnnames end columnnames(x) = propertynames(x) """ Tables.isrowtable(x) => Bool For convenience, some table objects that are naturally "row oriented" can define `Tables.isrowtable(::Type{TableType}) = true` to simplify satisfying the Tables.jl interface. Requirements for defining `isrowtable` include: * `Tables.rows(x) === x`, i.e. the table object itself is a `Row` iterator * If the table object is mutable, it should support: * `push!(x, row)`: allow pushing a single row onto table * `append!(x, rows)`: allow appending set of rows onto table * If table object is mutable and indexable, it should support: * `x[i] = row`: allow replacing of a row with another row by index A table object that defines `Tables.isrowtable` will have definitions for `Tables.istable`, `Tables.rowaccess`, and `Tables.rows` automatically defined. """ function isrowtable end isrowtable(::T) where {T} = isrowtable(T) isrowtable(::Type{T}) where {T} = false # to avoid ambiguities isrowtable(::Type{T}) where {T <: AbstractVector{Union{}}} = false # default definitions for AbstractDict to act as an AbstractColumns or AbstractRow getcolumn(x::AbstractDict{Symbol}, i::Int) = x[columnnames(x)[i]] getcolumn(x::AbstractDict{Symbol}, nm::Symbol) = x[nm] getcolumn(x::AbstractDict{Symbol}, ::Type{T}, i::Int, nm::Symbol) where {T} = x[nm] columnnames(x::AbstractDict{Symbol}) = collect(keys(x)) getcolumn(x::AbstractDict{<:AbstractString}, i::Int) = x[String(columnnames(x)[i])] getcolumn(x::AbstractDict{<:AbstractString}, nm::Symbol) = x[String(nm)] getcolumn(x::AbstractDict{<:AbstractString}, ::Type{T}, i::Int, nm::Symbol) where {T} = x[String(nm)] columnnames(x::AbstractDict{<:AbstractString}) = collect(Symbol(k) for k in keys(x)) # AbstractVector of Dicts for Tables.rows const DictRows = AbstractVector{T} where {T <: Union{AbstractDict{<:AbstractString}, AbstractDict{Symbol}}} isrowtable(::Type{<:DictRows}) = true # DictRows doesn't naturally lend itself to the `Tables.schema` requirement # we can't just look at the first row, because the types might change, # row-to-row (e.g. `missing`, then `1.1`, etc.). Therefore, the safest option # is to just return `nothing` schema(x::DictRows) = nothing # Dict of AbstractVectors for Tables.columns const DictColumns = Union{<:AbstractDict{<:AbstractString, <:AbstractVector}, <:AbstractDict{Symbol, <:AbstractVector}} istable(::Type{<:DictColumns}) = true columnaccess(::Type{<:DictColumns}) = true columns(x::DictColumns) = x schema(x::DictColumns) = Schema(collect(keys(x)), eltype.(values(x))) # for other AbstractDict, let's throw an informative error columns(x::T) where {T <: AbstractDict} = error("to treat $T as a table, it must have a key type of `Symbol`, and a value type `<: AbstractVector`") # default definitions for AbstractRow, AbstractColumns const RorC = Union{AbstractRow, AbstractColumns} # avoids mutual recursion with default definitions (issue #221) getcolumn(::T, ::Int) where {T <: RorC} = error("`Tables.getcolumn` must be specifically overloaded for $T <: Union{AbstractRow, AbstractColumns}`") getcolumn(::T, ::Symbol) where {T <: RorC} = error("`Tables.getcolumn` must be specifically overloaded for $T <: Union{AbstractRow, AbstractColumns}`") columnnames(::T) where {T <: RorC} = error("`Tables.columnnames` must be specifically overloaded for $T <: Union{AbstractRow, AbstractColumns}`") Base.IteratorSize(::Type{R}) where {R <: RorC} = Base.HasLength() Base.length(r::RorC) = length(columnnames(r)) Base.IndexStyle(::Type{<:RorC}) = Base.IndexLinear() Base.firstindex(r::RorC) = 1 Base.lastindex(r::RorC) = length(r) Base.getindex(r::RorC, i::Int) = getcolumn(r, i) Base.getindex(r::RorC, nm::Symbol) = getcolumn(r, nm) Base.getproperty(r::RorC, nm::Symbol) = getcolumn(r, nm) Base.getproperty(r::RorC, i::Int) = getcolumn(r, i) Base.propertynames(r::RorC) = columnnames(r) Base.keys(r::RorC) = columnnames(r) Base.values(r::RorC) = collect(r) Base.haskey(r::RorC, key::Symbol) = key in columnnames(r) Base.haskey(r::RorC, i::Int) = 0 < i <= length(columnnames(r)) Base.get(r::RorC, key::Union{Integer, Symbol}, default) = haskey(r, key) ? getcolumn(r, key) : default Base.get(f::Base.Callable, r::RorC, key::Union{Integer, Symbol}) = haskey(r, key) ? getcolumn(r, key) : f() Base.iterate(r::RorC, i=1) = i > length(r) ? nothing : (getcolumn(r, i), i + 1) Base.isempty(r::RorC) = length(r) == 0 function Base.NamedTuple(r::RorC) names = columnnames(r) return NamedTuple{Tuple(map(Symbol, names))}(Tuple(getcolumn(r, nm) for nm in names)) end function Base.show(io::IO, x::T) where {T <: AbstractRow} if get(io, :compact, false) || get(io, :limit, false) print(io, "$T: ") show(io, NamedTuple(x)) else println(io, "$T:") names = collect(columnnames(x)) values = [getcolumn(x, nm) for nm in names] Base.print_matrix(io, hcat(names, values)) end end function Base.show(io::IO, table::AbstractColumns; max_cols = 20) ncols = length(columnnames(table)) print(io, "$(typeof(table)) with $(rowcount(table)) rows, $(ncols) columns, and ") sch = schema(table) if sch !== nothing print(io, "schema:\n") show(IOContext(io, :print_schema_header => false), sch) else print(io, "an unknown schema.") end end # AbstractRow AbstractVector as Rows const AbstractRowTable = AbstractVector{T} where {T <: AbstractRow} isrowtable(::Type{<:AbstractRowTable}) = true schema(x::AbstractRowTable) = nothing # AbstractColumns as Columns istable(::Type{<:AbstractColumns}) = true columnaccess(::Type{<:AbstractColumns}) = true columns(x::AbstractColumns) = x schema(x::AbstractColumns) = nothing """ Tables.Row(row) Convenience type to wrap any `AbstractRow` interface object in a dedicated struct to provide useful default behaviors (allows any `AbstractRow` to be used like a `NamedTuple`): * Indexing interface defined; i.e. `row[i]` will return the column value at index `i`, `row[nm]` will return column value for column name `nm` * Property access interface defined; i.e. `row.col1` will retrieve the value for the column named `col1` * Iteration interface defined; i.e. `for x in row` will iterate each column value in the row * `AbstractDict` methods defined (`get`, `haskey`, etc.) for checking and retrieving column values """ struct Row{T} <: AbstractRow x::T end Row(x::Row) = x """ Tables.Columns(tbl) Convenience type that calls `Tables.columns` on an input `tbl` and wraps the resulting `AbstractColumns` interface object in a dedicated struct to provide useful default behaviors (allows any `AbstractColumns` to be used like a `NamedTuple` of `Vectors`): * Indexing interface defined; i.e. `row[i]` will return the column at index `i`, `row[nm]` will return column for column name `nm` * Property access interface defined; i.e. `row.col1` will retrieve the value for the column named `col1` * Iteration interface defined; i.e. `for x in row` will iterate each column in the row * `AbstractDict` methods defined (`get`, `haskey`, etc.) for checking and retrieving columns Note that `Tables.Columns` calls `Tables.columns` internally on the provided table argument. `Tables.Columns` can be used for dispatch if needed. """ struct Columns{T} <: AbstractColumns x::T function Columns(x) cols = columns(x) return new{typeof(cols)}(cols) end end Columns(x::Columns) = x # Columns can only wrap something that is a table, so we pass the schema through schema(x::Columns) = schema(getx(x)) const RorC2 = Union{Row, Columns} getx(x::RorC2) = getfield(x, :x) getcolumn(x::RorC2, i::Int) = getcolumn(getx(x), i) getcolumn(x::RorC2, nm::Symbol) = getcolumn(getx(x), nm) getcolumn(x::RorC2, ::Type{T}, i::Int, nm::Symbol) where {T} = getcolumn(getx(x), T, i, nm) columnnames(x::RorC2) = columnnames(getx(x)) """ Tables.istable(x) => Bool Check if an object has specifically defined that it is a table. Note that not all valid tables will return true, since it's possible to satisfy the Tables.jl interface at "run-time", e.g. a `Generator` of `NamedTuple`s iterates `NamedTuple`s, which satisfies the `AbstractRow` interface, but there's no static way of knowing that the generator is a table. It is recommended that for users implementing `MyType`, they define only `istable(::Type{MyType})`. `istable(::MyType)` will then automatically delegate to this method. `istable` calls `TableTraits.isiterabletable` as a fallback. This can have a considerable runtime overhead in some contexts. To avoid these and use `istable` as a compile-time trait, it can be called on a type as `istable(typeof(obj))`. """ function istable end istable(x::T) where {T} = istable(T) || TableTraits.isiterabletable(x) === true istable(::Type{T}) where {T} = isrowtable(T) # to avoid ambiguities istable(::Type{T}) where {T <: AbstractVector{Union{}}} = false istable(::AbstractVector{Union{}}) = false """ Tables.rowaccess(x) => Bool Check whether an object has specifically defined that it implements the `Tables.rows` function that does _not_ copy table data. That is to say, `Tables.rows(x)` must be done with O(1) time and space complexity when `Tables.rowaccess(x) == true`. Note that `Tables.rows` will work on any object that iterates `AbstractRow`-compatible objects, even if they don't define `rowaccess`, e.g. a `Generator` of `NamedTuple`s. However, this generic fallback may copy the data from input table `x`. Also note that just because an object defines `rowaccess` doesn't mean a user should call `Tables.rows` on it; `Tables.columns` will also work, providing a valid `AbstractColumns` object from the rows. Hence, users should call `Tables.rows` or `Tables.columns` depending on what is most natural for them to *consume* instead of worrying about what and how the input is oriented. It is recommended that for users implementing `MyType`, they define only `rowaccess(::Type{MyType})`. `rowaccess(::MyType)` will then automatically delegate to this method. """ function rowaccess end rowaccess(x::T) where {T} = rowaccess(T) rowaccess(::Type{T}) where {T} = isrowtable(T) """ Tables.columnaccess(x) => Bool Check whether an object has specifically defined that it implements the `Tables.columns` function that does _not_ copy table data. That is to say, `Tables.columns(x)` must be done with O(1) time and space complexity when `Tables.columnaccess(x) == true`. Note that `Tables.columns` has generic fallbacks allowing it to produces `AbstractColumns` objects, even if the input doesn't define `columnaccess`. However, this generic fallback may copy the data from input table `x`. Also note that just because an object defines `columnaccess` doesn't mean a user should call `Tables.columns` on it; `Tables.rows` will also work, providing a valid `AbstractRow` iterator. Hence, users should call `Tables.rows` or `Tables.columns` depending on what is most natural for them to *consume* instead of worrying about what and how the input is oriented. It is recommended that for users implementing `MyType`, they define only `columnaccess(::Type{MyType})`. `columnaccess(::MyType)` will then automatically delegate to this method. """ function columnaccess end columnaccess(x::T) where {T} = columnaccess(T) columnaccess(::Type{T}) where {T} = false """ Tables.schema(x) => Union{Nothing, Tables.Schema} Attempt to retrieve the schema of the object returned by `Tables.rows` or `Tables.columns`. If the `AbstractRow` iterator or `AbstractColumns` object can't determine its schema, `nothing` will be returned. Otherwise, a `Tables.Schema` object is returned, with the column names and types available for use. """ function schema end schema(x) = nothing """ Tables.materializer(x) => Callable For a table input, return the "sink" function or "materializing" function that can take a Tables.jl-compatible table input and make an instance of the table type. This enables "transform" workflows that take table inputs, apply transformations, potentially converting the table to a different form, and end with producing a table of the same type as the original input. The default materializer is `Tables.columntable`, which converts any table input into a `NamedTuple` of `Vector`s. It is recommended that for users implementing `MyType`, they define only `materializer(::Type{<:MyType})`. `materializer(::MyType)` will then automatically delegate to this method. """ function materializer end materializer(x::T) where {T} = materializer(T) materializer(::Type{T}) where {T} = columntable """ Tables.columns(x) => AbstractColumns-compatible object Accesses data of input table source `x` by returning an [`AbstractColumns`](@ref)-compatible object, which allows retrieving entire columns by name or index. A retrieved column is a 1-based indexable object that has a known length, i.e. supports `length(col)` and `col[i]` for any `i = 1:length(col)`. Note that even if the input table source is row-oriented by nature, an efficient generic definition of `Tables.columns` is defined in Tables.jl to build a `AbstractColumns`- compatible object object from the input rows. The [`Tables.Schema`](@ref) of a `AbstractColumns` object can be queried via `Tables.schema(columns)`, which may return `nothing` if the schema is unknown. Column names can always be queried by calling `Tables.columnnames(columns)`, and individual columns can be accessed by calling `Tables.getcolumn(columns, i::Int )` or `Tables.getcolumn(columns, nm::Symbol)` with a column index or name, respectively. Note that if `x` is an object in which columns are stored as vectors, the check that these vectors use 1-based indexing is not performed (it should be ensured when `x` is constructed). """ function columns end """ Tables.rows(x) => Row iterator Accesses data of input table source `x` row-by-row by returning an [`AbstractRow`](@ref)-compatible iterator. Note that even if the input table source is column-oriented by nature, an efficient generic definition of `Tables.rows` is defined in Tables.jl to return an iterator of row views into the columns of the input. The [`Tables.Schema`](@ref) of an `AbstractRow` iterator can be queried via `Tables.schema(rows)`, which may return `nothing` if the schema is unknown. Column names can always be queried by calling `Tables.columnnames(row)` on an individual row, and row values can be accessed by calling `Tables.getcolumn(row, i::Int )` or `Tables.getcolumn(row, nm::Symbol)` with a column index or name, respectively. See also [`rowtable`](@ref) and [`namedtupleiterator`](@ref). """ function rows end # Schema implementation """ Tables.Schema(names, types) Create a `Tables.Schema` object that holds the column names and types for an `AbstractRow` iterator returned from `Tables.rows` or an `AbstractColumns` object returned from `Tables.columns`. `Tables.Schema` is dual-purposed: provide an easy interface for users to query these properties, as well as provide a convenient "structural" type for code generation. To get a table's schema, one can call `Tables.schema` on the result of `Tables.rows` or `Tables.columns`, but also note that a table may return `nothing`, indicating that its column names and/or column element types are unknown (usually not inferable). This is similar to the `Base.EltypeUnknown()` trait for iterators when `Base.IteratorEltype` is called. Users should account for the `Tables.schema(tbl) => nothing` case by using the properties of the results of `Tables.rows(x)` and `Tables.columns(x)` directly. To access the names, one can simply call `sch.names` to return a collection of Symbols (`Tuple` or `Vector`). To access column element types, one can similarly call `sch.types`, which will return a collection of types (like `(Int64, Float64, String)`). The actual type definition is ```julia struct Schema{names, types} storednames::Union{Nothing, Vector{Symbol}} storedtypes::Union{Nothing, Vector{Type}} end ``` Where `names` is a tuple of `Symbol`s or `nothing`, and `types` is a tuple _type_ of types (like `Tuple{Int64, Float64, String}`) or `nothing`. Encoding the names & types as type parameters allows convenient use of the type in generated functions and other optimization use-cases, but users should note that when `names` and/or `types` are the `nothing` value, the names and/or types are stored in the `storednames` and `storedtypes` fields. This is to account for extremely wide tables with columns in the 10s of thousands where encoding the names/types as type parameters becomes prohibitive to the compiler. So while optimizations can be written on the typed `names`/`types` type parameters, users should also consider handling the extremely wide tables by specializing on `Tables.Schema{nothing, nothing}`. """ struct Schema{names, types} storednames::Union{Nothing, Vector{Symbol}} storedtypes::Union{Nothing, Vector{Type}} end Schema{names, types}() where {names, types} = Schema{names, types}(nothing, nothing) Schema(names::Tuple{Vararg{Symbol}}, ::Type{T}) where {T <: Tuple} = Schema{names, T}() Schema(::Type{NamedTuple{names, types}}) where {names, types} = Schema{names, types}() # whether names/types are stored or not stored(::Schema{names, types}) where {names, types} = names === nothing && types === nothing stored(::Nothing) = false # pass through Ints to allow Tuples to act as rows sym(x) = Symbol(x) sym(x::Int) = x Schema(names, ::Nothing) = Schema{Tuple(map(sym, names)), nothing}() const SCHEMA_SPECIALIZATION_THRESHOLD = (2^16) - 1 function Schema(names, types; stored::Bool=false) if stored || length(names) > SCHEMA_SPECIALIZATION_THRESHOLD return Schema{nothing, nothing}([sym(x) for x in names], Type[T for T in types]) else return Schema{Tuple(map(sym, names)), Tuple{types...}}() end end function Base.show(io::IO, sch::Schema) get(io, :print_schema_header, true) && println(io, "Tables.Schema:") nms = sch.names Base.print_matrix(io, hcat(nms isa Vector ? nms : collect(nms), sch.types === nothing ? fill(nothing, length(nms)) : collect(sch.types))) end function Base.getproperty(sch::Schema{names, types}, field::Symbol) where {names, types} if field === :names return names === nothing ? getfield(sch, :storednames) : names elseif field === :types if types === nothing return getfield(sch, :storedtypes) else ncol = fieldcount(types) if ncol <= 512 # Type stable, but slower to compile return ntuple(i -> fieldtype(types, i), Val(ncol)) else return Tuple(fieldtype(types, i) for i=1:ncol) end end else throw(ArgumentError("unsupported property for Tables.Schema")) end end Base.propertynames(::Schema) = (:names, :types) ==(a::Schema, b::Schema) = a.names == b.names && a.types == b.types # partitions """ Tables.partitions(x) Request a "table" iterator from `x`. Each iterated element must be a "table" in the sense that one may call `Tables.rows` or `Tables.columns` to get a row-iterator or collection of columns. All iterated elements _must_ have identical schema, so that users may call `Tables.schema(first_element)` on the first iterated element and know that each subsequent iteration will match the same schema. The default definition is: ```julia Tables.partitions(x) = (x,) ``` So that any input is assumed to be a single "table". This means users should feel free to call `Tables.partitions` anywhere they're currently calling `Tables.columns` or `Tables.rows`, and get back an iterator of those instead. In other words, "sink" functions can use `Tables.partitions` whether or not the user passes a partionable table, since the default is to treat a single input as a single, non-partitioned table. [`Tables.partitioner(itr)`](@ref) is a convenience wrapper to provide table partitions from any table iterator; this allows for easy wrapping of a `Vector` or iterator of tables as valid partitions, since by default, they'd be treated as a single table. A 2nd convenience method is provided with the definition: ```julia Tables.partitions(x...) = x ``` That allows passing vararg tables and they'll be treated as separate partitions. Sink functions may allow vararg table inputs and can "splat them through" to `partitions`. For convenience, `Tables.partitions(x::Iterators.PartitionIterator) = x` and `Tables.partitions(x::Tables.Partitioner) = x` are defined to handle cases where user created partitioning with the `Iterators.partition` or [`Tables.partitioner`](@ref) functions. """ partitions(x) = (x,) partitions(x...) = x partitions(x::Iterators.PartitionIterator) = x """ Tables.LazyTable(f, arg) A "table" type that delays materialization until `Tables.columns` or `Tables.rows` is called. This allows, for example, sending a `LazyTable` to a remote process or thread which can then call `Tables.columns` or `Tables.rows` to "materialize" the table. Is used by default in `Tables.partitioner(f, itr)` where a materializer function `f` is passed to each element of an iterable `itr`, allowing distributed/concurrent patterns like: ```julia for tbl in Tables.partitions(Tables.partitioner(CSV.File, list_of_csv_files)) Threads.@spawn begin cols = Tables.columns(tbl) # do stuff with cols end end ``` In this example, `CSV.File` will be called like `CSV.File(x)` for each element of the `list_of_csv_files` iterable, but _not until_ `Tables.columns(tbl)` is called, which in this case happens in a thread-spawned task, allowing files to be parsed and processed in parallel. """ struct LazyTable{F, T} f::F x::T end columns(x::LazyTable) = columns(x.f(x.x)) rows(x::LazyTable) = rows(x.f(x.x)) struct Partitioner{T} x::T end """ Tables.subset(x, inds; viewhint=nothing) Return one or more rows from table `x` according to the position(s) specified by `inds`: - If `inds` is a single non-boolean integer return a row object. - If `inds` is a vector of non-boolean integers, a vector of booleans, or a `:`, return a subset of the original table according to the indices. In this case, the returned type is not necessarily the same as the original table type. If other types of `inds` are passed than specified above the behavior is undefined. The `viewhint` argument tries to influence whether the returned object is a view of the original table or an independent copy: - If `viewhint=nothing` (the default) then the implementation for a specific table type is free to decide whether to return a copy or a view. - If `viewhint=true` then a view is returned and if `viewhint=false` a copy is returned. This applies both to returning a row or a table. Any specialized implementation of `subset` must support the `viewhint=nothing` argument. Support for `viewhint=true` or `viewhint=false` is optional (i.e. implementations may ignore the keyword argument and return a view or a copy regardless of `viewhint` value). """ function subset(x::T, inds; viewhint::Union{Bool, Nothing}=nothing, view::Union{Bool, Nothing}=nothing) where {T} if view !== nothing @warn "`view` keyword argument is deprecated for `Tables.subset`, use `viewhint` instead" viewhint = view end # because this method is being called, we know `x` didn't define it's own Tables.subset # first check if it supports column access, and if so, apply inds and wrap columns in a DictColumnTable if columnaccess(x) cols = columns(x) if inds isa Integer return ColumnsRow(cols, inds) else ret = viewhint === true ? _map(c -> Base.view(c, inds), cols) : _map(c -> c[inds], cols) return DictColumnTable(schema(cols), ret) end end # otherwise, let's get the rows and see if we can apply inds to them r = rows(x) if r isa AbstractVector inds isa Integer && return r[inds] ret = viewhint === true ? Base.view(x, inds) : x[inds] (ret isa AbstractVector) || throw(ArgumentError("`Tables.subset`: invalid `inds` argument, expected `AbstractVector` output, got $(typeof(ret))")) return ret end throw(ArgumentError("no default `Tables.subset` implementation for type: $T")) end vectorcheck(x::AbstractVector) = x vectorcheck(x) = throw(ArgumentError("`Tables.subset`: invalid `inds` argument, expected `AbstractVector` output, got $(typeof(x))")) _map(f, cols) = OrderedDict(nm => vectorcheck(f(getcolumn(cols, nm))) for nm in columnnames(cols)) """ Tables.partitioner(f, itr) Tables.partitioner(x) Convenience methods to generate table iterators. The first method takes a "materializer" function `f` and an iterator `itr`, and will call `Tables.LazyTable(f, x) for x in itr` for each iteration. This allows delaying table materialization until `Tables.columns` or `Tables.rows` are called on the `LazyTable` object (which will call `f(x)`). This allows a common desired pattern of materializing and processing a table on a remote process or thread, like: ```julia for tbl in Tables.partitions(Tables.partitioner(CSV.File, list_of_csv_files)) Threads.@spawn begin cols = Tables.columns(tbl) # do stuff with cols end end ``` The second method is provided because the default behavior of `Tables.partition(x)` is to treat `x` as a single, non-partitioned table. This method allows users to easily wrap a `Vector` or generator of tables as table partitions to pass to sink functions able to utilize `Tables.partitions`. """ partitioner(x) = Partitioner(x) partitioner(f, itr) = partitioner((LazyTable(f, x) for x in itr)) partitions(x::Partitioner) = x Base.IteratorEltype(::Type{P}) where {S,P<:Partitioner{S}} = Base.IteratorEltype(S) Base.eltype(::Type{P}) where {S,P<:Partitioner{S}} = eltype(S) Base.IteratorSize(::Type{P}) where {S,P<:Partitioner{S}} = Base.IteratorSize(S) Base.length(x::Partitioner) = length(x.x) Base.size(x::Partitioner) = size(x.x) Base.iterate(x::Partitioner, st...) = iterate(x.x, st...) const SPECIALIZATION_THRESHOLD = 100 # reference implementations: Vector of NamedTuples and NamedTuple of Vectors include("namedtuples.jl") # helper functions include("utils.jl") # generic fallback definitions include("fallbacks.jl") # allow any valid iterator to be a table include("tofromdatavalues.jl") # matrix integration include("matrix.jl") # dict tables include("dicts.jl") """ Tables.columnindex(table, name::Symbol) Return the column index (1-based) of a column by `name` in a table with a known schema; returns 0 if `name` doesn't exist in table """ columnindex(table, colname::Symbol) = columnindex(schema(table), colname) """ Tables.columntype(table, name::Symbol) Return the column element type of a column by `name` in a table with a known schema; returns Union{} if `name` doesn't exist in table """ columntype(table, colname::Symbol) = columntype(schema(table), colname) Base.@pure columnindex(::Schema{names, types}, name::Symbol) where {names, types} = columnindex(names, name) "given names and a Symbol `name`, compute the index (1-based) of the name in names" Base.@pure function columnindex(names::Tuple{Vararg{Symbol}}, name::Symbol) i = 1 for nm in names nm === name && return i i += 1 end return 0 end Base.@pure columntype(::Schema{names, types}, name::Symbol) where {names, types} = columntype(names, types, name) "given tuple type and a Symbol `name`, compute the type of the name in the tuples types" Base.@pure function columntype(names::Tuple{Vararg{Symbol}}, ::Type{types}, name::Symbol) where {types <: Tuple} i = 1 for nm in names nm === name && return fieldtype(types, i) i += 1 end return Union{} end end # module 3/opt/julia/packages/Tables/NSGZI/src/namedtuples.jlk# Vector of NamedTuples const RowTable{T} = AbstractVector{T} where {T <: NamedTuple} # interface implementation isrowtable(::Type{<:RowTable}) = true schema(x::AbstractVector{NamedTuple{names, types}}) where {names, types} = Schema(names, types) materializer(x::RowTable) = rowtable # struct to transform `Row`s into NamedTuples struct NamedTupleIterator{schema, T} x::T end """ Tables.namedtupleiterator(x) Pass any table input source and return a `NamedTuple` iterator See also [`rows`](@ref) and [`rowtable`](@ref). Not for use with extremely wide tables with # of columns > 67K; current fundamental compiler limits prevent constructing `NamedTuple`s that large. """ function namedtupleiterator(x) r = rows(x) sch = schema(r) stored(sch) && throw(ArgumentError("input table too wide ($(length(sch.names)) columns) to construct `NamedTuple` rows")) return NamedTupleIterator{typeof(sch), typeof(r)}(r) end namedtupleiterator(::Type{T}, x) where {T <: NamedTuple} = x namedtupleiterator(T, x) = namedtupleiterator(x) Base.IteratorEltype(::Type{NT}) where {names, types, T, NT<:NamedTupleIterator{Schema{names, types}, T}} = Base.HasEltype() Base.IteratorEltype(::Type{NT}) where {T, NT<:NamedTupleIterator{Nothing, T}} = Base.EltypeUnknown() Base.eltype(::Type{NT}) where {names, types, T, NT<:NamedTupleIterator{Schema{names, types}, T}} = NamedTuple{map(Symbol, names), types} Base.IteratorSize(::Type{NT}) where {sch, T, NT<:NamedTupleIterator{sch, T}} = Base.IteratorSize(T) Base.length(nt::NamedTupleIterator) = length(nt.x) Base.size(nt::NamedTupleIterator) = (length(nt.x),) @inline function _iterate(rows::NamedTupleIterator{Schema{names, T}}, st=()) where {names, T} # use of @generated justified because it's user-controlled; they explicitly asked for vector of namedtuples if @generated vals = Any[ :(getcolumn(row, $(fieldtype(T, i)), $i, $(quot(names[i])))) for i = 1:fieldcount(T) ] ret = Expr(:new, :(NamedTuple{names, T}), vals...) return quote x = iterate(rows.x, st...) x === nothing && return nothing row, st = x return $ret, (st,) end else x = iterate(rows.x, st...) x === nothing && return nothing row, st = x return NamedTuple{map(Symbol, names), T}(Tuple(getcolumn(row, fieldtype(T, i), i, names[i]) for i = 1:fieldcount(T))), (st,) end end @inline function Base.iterate(rows::NamedTupleIterator{Schema{names, T}}, st=()) where {names, T} if fieldcount(T) <= SPECIALIZATION_THRESHOLD return _iterate(rows, st) else x = iterate(rows.x, st...) x === nothing && return nothing row, st = x return NamedTuple{map(Symbol, names), T}(Tuple(getcolumn(row, fieldtype(T, i), i, names[i]) for i = 1:fieldcount(T))), (st,) end end function Base.iterate(rows::NamedTupleIterator{Nothing}) x = iterate(rows.x) x === nothing && return nothing row, st = x names = Tuple(columnnames(row)) return NamedTuple{names}(Tuple(getcolumn(row, nm) for nm in names)), (Val(names), (st,)) end function Base.iterate(rows::NamedTupleIterator{Nothing}, state::Tuple{Val{names}, T}) where {names, T} x = iterate(rows.x, state[2]...) x === nothing && return nothing row, st = x return NamedTuple{names}(Tuple(getcolumn(row, nm) for nm in names)), (Val(names), (st,)) end # sink function """ Tables.rowtable(x) => Vector{NamedTuple} Take any input table source, and produce a `Vector` of `NamedTuple`s, also known as a "row table". A "row table" is a kind of default table type of sorts, since it satisfies the Tables.jl row interface naturally, i.e. a `Vector` naturally iterates its elements, and `NamedTuple` satisfies the `AbstractRow` interface by default (allows indexing value by index, name, and getting all names). For a lazy iterator over rows see [`rows`](@ref) and [`namedtupleiterator`](@ref). Not for use with extremely wide tables with # of columns > 67K; current fundamental compiler limits prevent constructing `NamedTuple`s that large. """ function rowtable end function rowtable(itr::T) where {T} r = rows(itr) return collect(namedtupleiterator(eltype(r), r)) end # NamedTuple of arrays of matching dimensionality const ColumnTable = NamedTuple{names, T} where {names, T <: NTuple{N, AbstractVector}} where {N} rowcount(c::ColumnTable) = length(c) == 0 ? 0 : length(c[1]) function subset(x::ColumnTable, inds; viewhint::Union{Bool,Nothing}=nothing, view::Union{Bool,Nothing}=nothing) if view !== nothing @warn "`view` keyword argument is deprecated for `Tables.subset`, use `viewhint` instead" viewhint = view end if inds isa Integer return map(c -> c[inds], x) else return viewhint === true ? map(c -> vectorcheck(Base.view(c, inds)), x) : map(c -> vectorcheck(c[inds]), x) end end # interface implementation istable(::Type{<:ColumnTable}) = true columnaccess(::Type{<:ColumnTable}) = true # a NamedTuple of AbstractVectors is itself a `Columns` object columns(x::ColumnTable) = x _eltype(::Type{A}) where {T, A <: AbstractVector{T}} = T Base.@pure function _eltypes(::Type{NT}) where {NT <: ColumnTable} return Tuple{Any[ _eltype(fieldtype(NT, i)) for i = 1:fieldcount(NT) ]...} end names(::Type{NT}) where {nms, T, NT<:NamedTuple{nms, T}} = nms types(::Type{NT}) where {nms, T, NT<:NamedTuple{nms, T}} = T schema(x::T) where {T <: ColumnTable} = Schema(names(T), _eltypes(T)) materializer(x::ColumnTable) = columntable getarray(x::AbstractArray) = x getarray(x) = collect(x) """ Tables.columntable(x) => NamedTuple of AbstractVectors Takes any input table source `x` and returns a `NamedTuple` of `AbstractVector`s, also known as a "column table". A "column table" is a kind of default table type of sorts, since it satisfies the Tables.jl column interface naturally. Note that if `x` is an object in which columns are stored as vectors, the check that these vectors use 1-based indexing is not performed (it should be ensured when `x` is constructed). Not for use with extremely wide tables with # of columns > 67K; current fundamental compiler limits prevent constructing `NamedTuple`s that large. """ function columntable end function _columntable(sch::Schema{names, types}, cols) where {names, types} # use of @generated justified because it's user-controlled; they explicitly asked for namedtuple of vectors if @generated vals = Tuple(:(getarray(getcolumn(cols, $(fieldtype(types, i)), $i, $(quot(names[i]))))) for i = 1:fieldcount(types)) return :(NamedTuple{map(Symbol, names)}(($(vals...),))) else return NamedTuple{map(Symbol, names)}(Tuple(getarray(getcolumn(cols, fieldtype(types, i), i, names[i])) for i = 1:fieldcount(types))) end end function columntable(sch::Schema{names, types}, cols) where {names, types} if fieldcount(types) <= SPECIALIZATION_THRESHOLD return _columntable(sch, cols) else return NamedTuple{map(Symbol, names)}(Tuple(getarray(getcolumn(cols, fieldtype(types, i), i, names[i])) for i = 1:fieldcount(types))) end end # extremely large tables columntable(sch::Schema{nothing, nothing}, cols) = throw(ArgumentError("input table too wide ($(length(sch.names)) columns) to convert to `NamedTuple` of `AbstractVector`s")) # unknown schema case columntable(::Nothing, cols) = NamedTuple{Tuple(map(Symbol, columnnames(cols)))}(Tuple(getarray(getcolumn(cols, col)) for col in columnnames(cols))) function columntable(itr::T) where {T} cols = columns(itr) cols isa ColumnTable && return cols return columntable(schema(cols), cols) end columntable(x::ColumnTable) = x # implement default nrow and ncol methods for DataAPI.jl DataAPI.nrow(table::ColumnTable) = isempty(table) ? 0 : length(first(table)) DataAPI.ncol(table::ColumnTable) = length(table) DataAPI.nrow(table::RowTable) = length(table) DataAPI.ncol(table::RowTable) = isempty(table) ? 0 : length(first(table)) -/opt/julia/packages/Tables/NSGZI/src/utils.jl""helper function to calculate a run-length encoding of a tuple type" Base.@pure function runlength(::Type{T}) where {T <: Tuple} rle = Tuple{Type, Int}[] fieldcount(T) == 0 && return rle curT = fieldtype(T, 1) prevT = curT len = 1 for i = 2:fieldcount(T) @inbounds curT = fieldtype(T, i) if curT === prevT len += 1 else push!(rle, (prevT, len)) prevT = curT len = 1 end end push!(rle, (curT, len)) return rle end """ Tables.eachcolumn(f, sch::Tables.Schema{names, types}, x::Union{Tables.AbstractRow, Tables.AbstractColumns}) Tables.eachcolumn(f, sch::Tables.Schema{names, nothing}, x::Union{Tables.AbstractRow, Tables.AbstractColumns}) Takes a function `f`, table schema `sch`, `x`, which is an object that satisfies the `AbstractRow` or `AbstractColumns` interfaces; it generates calls to get the value for each column (`Tables.getcolumn(x, nm)`) and then calls `f(val, index, name)`, where `f` is the user-provided function, `val` is the column value (`AbstractRow`) or entire column (`AbstractColumns`), `index` is the column index as an `Int`, and `name` is the column name as a `Symbol`. An example using `Tables.eachcolumn` is: ```julia rows = Tables.rows(tbl) sch = Tables.schema(rows) if sch === nothing state = iterate(rows) state === nothing && return row, st = state sch = Tables.schema(Tables.columnnames(row), nothing) while state !== nothing Tables.eachcolumn(sch, row) do val, i, nm bind!(stmt, i, val) end state = iterate(rows, st) state === nothing && return row, st = state end else for row in rows Tables.eachcolumn(sch, row) do val, i, nm bind!(stmt, i, val) end end end ``` Note in this example we account for the input table potentially returning `nothing` from `Tables.schema(rows)`; in that case, we start iterating the rows, and build a partial schema using the column names from the first row `sch = Tables.schema(Tables.columnnames(row), nothing)`, which is valid to pass to `Tables.eachcolumn`. """ function eachcolumn end quot(s::Symbol) = Meta.QuoteNode(s) quot(x::Int) = x @inline function eachcolumn(f::F, sch::Schema{names, types}, row::T) where {F, names, types, T} N = fieldcount(types) if N <= SPECIALIZATION_THRESHOLD Base.@nexprs 100 i -> begin if i <= N f(getcolumn(row, fieldtype(types, i), i, names[i]), i, names[i]) end end else for (i, nm) in enumerate(names) f(getcolumn(row, fieldtype(types, i), i, nm), i, nm) end end return end @inline function eachcolumn(f::F, sch::Schema{names, nothing}, row::T) where {F, names, T} N = length(names) if N <= SPECIALIZATION_THRESHOLD Base.@nexprs 100 i -> begin if i <= N f(getcolumn(row, names[i]), i, names[i]) end end else for (i, nm) in enumerate(names) f(getcolumn(row, nm), i, nm) end end return end @inline function eachcolumn(f::F, sch::Schema{nothing, nothing}, row::T) where {F, T} for (i, nm) in enumerate(sch.names) f(getcolumn(row, nm), i, nm) end return end # these are specialized `eachcolumn`s where we also want # the indexing of `columns` to be constant propagated, so it needs to be returned from the generated function @inline function eachcolumns(f::F, sch::Schema{names, types}, row::T, columns::S, args...) where {F, names, types, T, S} N = fieldcount(types) if N <= SPECIALIZATION_THRESHOLD Base.@nexprs 100 i -> begin if i <= N f(getcolumn(row, fieldtype(types, i), i, names[i]), i, names[i], columns[i], args...) end end else for (i, nm) in enumerate(names) f(getcolumn(row, fieldtype(types, i), i, nm), i, nm, columns[i], args...) end end return end @inline function eachcolumns(f::F, sch::Schema{names, nothing}, row::T, columns::S, args...) where {F, names, T, S} N = length(names) if N <= SPECIALIZATION_THRESHOLD Base.@nexprs 100 i -> begin if i <= N f(getcolumn(row, names[i]), i, names[i], columns[i], args...) end end else for (i, nm) in enumerate(names) f(getcolumn(row, nm), i, nm, columns[i], args...) end end return end @inline function eachcolumns(f::F, sch::Schema{nothing, nothing}, row::T, columns::S, args...) where {F, T, S} for (i, nm) in enumerate(sch.names) f(getcolumn(row, nm), i, nm, columns[i], args...) end return end """ rowmerge(row, other_rows...) rowmerge(row; fields_to_merge...) Return a `NamedTuple` by merging `row` (an `AbstractRow`-compliant value) with `other_rows` (one or more `AbstractRow`-compliant values) via `Base.merge`. This function is similar to `Base.merge(::NamedTuple, ::NamedTuple...)`, but accepts `AbstractRow`-compliant values instead of `NamedTuple`s. A convenience method `rowmerge(row; fields_to_merge...) = rowmerge(row, fields_to_merge)` is defined that enables the `fields_to_merge` to be specified as keyword arguments. """ rowmerge(row, other) = merge(_row_to_named_tuple(row), _row_to_named_tuple(other)) rowmerge(row, other, more...) = merge(_row_to_named_tuple(row), rowmerge(other, more...)) rowmerge(row; fields_to_merge...) = rowmerge(row, values(fields_to_merge)) _row_to_named_tuple(row::NamedTuple) = row _row_to_named_tuple(row) = NamedTuple(Row(row)) """ ByRow <: Function `ByRow(f)` returns a function which applies function `f` to each element in a vector. `ByRow(f)` can be passed two types of arguments: - One or more 1-based `AbstractVector`s of equal length: In this case the returned value is a vector resulting from applying `f` to elements of passed vectors element-wise. Function `f` is called exactly once for each element of passed vectors (as opposed to `map` which assumes for some types of source vectors (e.g. `SparseVector`) that the wrapped function is pure, and may call the function `f` only once for multiple equal values. - A `Tables.ColumnTable` holding 1-based columns of equal length: In this case the function `f` is passed a `NamedTuple` created for each row of passed table. The return value of `ByRow(f)` is always a vector. `ByRow` expects that at least one argument is passed to it and in the case of `Tables.ColumnTable` passed that the table has at least one column. In some contexts of operations on tables (for example `DataFrame`) the user might want to pass no arguments (or an empty `Tables.ColumnTable`) to `ByRow`. This case must be separately handled by the code implementing the logic of processing the `ByRow` operation on this specific parent table (the reason is that passing such arguments to `ByRow` does not allow it to determine the number of rows of the source table). # Examples ``` julia> Tables.ByRow(x -> x^2)(1:3) 3-element Vector{Int64}: 1 4 9 julia> Tables.ByRow((x, y) -> x*y)(1:3, 2:4) 3-element Vector{Int64}: 2 6 12 julia> Tables.ByRow(x -> x.a)((a=1:2, b=3:4)) 2-element Vector{Int64}: 1 2 julia> Tables.ByRow(x -> (a=x.a*2, b=sin(x.b), c=x.c))((a=[1, 2, 3], b=[1.2, 3.4, 5.6], c=["a", "b", "c"])) 3-element Vector{NamedTuple{(:a, :b, :c), Tuple{Int64, Float64, String}}}: (a = 2, b = 0.9320390859672263, c = "a") (a = 4, b = -0.2555411020268312, c = "b") (a = 6, b = -0.6312666378723216, c = "c") ``` """ struct ByRow{T} <: Function fun::T end # invoke the generic AbstractVector function to ensure function is called # exactly once for each element function (f::ByRow)(cols::AbstractVector...) if !(all(col -> ==(length(first(cols)))(length(col)) && firstindex(col) == 1, cols)) throw(ArgumentError("All passed vectors must have the same length and use 1-based indexing")) end return invoke(map, Tuple{typeof(f.fun), ntuple(i -> AbstractVector, length(cols))...}, f.fun, cols...) end function (f::ByRow)(table::ColumnTable) if !(all(col -> ==(length(first(table)))(length(col)) && firstindex(col) == 1, table)) throw(ArgumentError("All passed vectors must have the same length and use 1-based indexing")) end return [f.fun(nt) for nt in Tables.namedtupleiterator(table)] end (f::ByRow)() = throw(ArgumentError("no arguments passed")) (f::ByRow)(::NamedTuple{(), Tuple{}}) = throw(ArgumentError("no columns passed in Tables.ColumnTable")) 1/opt/julia/packages/Tables/NSGZI/src/fallbacks.jl/## generic `Tables.rows` and `Tables.columns` fallbacks ## if a table provides Tables.rows or Tables.columns, ## we'll provide a default implementation of the other # Turn any AbstractColumns into an AbstractRow iterator # get the number of rows in the incoming table function rowcount(cols) names = columnnames(cols) isempty(names) && return 0 return length(getcolumn(cols, names[1])) end # a lazy row view into a AbstractColumns object struct ColumnsRow{T} <: AbstractRow columns::T # an `AbstractColumns`-compatible object row::Int # row number end getcolumns(c::ColumnsRow) = getfield(c, :columns) getrow(c::ColumnsRow) = getfield(c, :row) # AbstractRow interface Base.@propagate_inbounds getcolumn(c::ColumnsRow, ::Type{T}, col::Int, nm::Symbol) where {T} = getcolumn(getcolumns(c), T, col, nm)[getrow(c)] Base.@propagate_inbounds getcolumn(c::ColumnsRow, i::Int) = getcolumn(getcolumns(c), i)[getrow(c)] Base.@propagate_inbounds getcolumn(c::ColumnsRow, nm::Symbol) = getcolumn(getcolumns(c), nm)[getrow(c)] columnnames(c::ColumnsRow) = columnnames(getcolumns(c)) @generated function Base.isless(c::ColumnsRow{T}, d::ColumnsRow{T}) where {names, T <: NamedTuple{names}} exprs = Expr[] for n in names var1 = Expr(:., :c, QuoteNode(n)) var2 = Expr(:., :d, QuoteNode(n)) bl = quote a, b = $var1, $var2 isless(a, b) && return true isequal(a, b) || return false end push!(exprs, bl) end push!(exprs, :(return false)) Expr(:block, exprs...) end @generated function Base.isequal(c::ColumnsRow{T}, d::ColumnsRow{T}) where {names, T <: NamedTuple{names}} exprs = Expr[] for n in names var1 = Expr(:., :c, QuoteNode(n)) var2 = Expr(:., :d, QuoteNode(n)) push!(exprs, :(isequal($var1, $var2) || return false)) end push!(exprs, :(return true)) Expr(:block, exprs...) end # RowIterator wraps an AbstractColumns object and provides row iteration via lazy row views struct RowIterator{T} columns::T len::Int end Base.eltype(::Type{R}) where {T,R<:RowIterator{T}} = ColumnsRow{T} Base.length(x::RowIterator) = getfield(x, :len) Base.getproperty(x::RowIterator, nm::Symbol) = getcolumn(x, nm) Base.getproperty(x::RowIterator, i::Int) = getcolumn(x, i) Base.propertynames(x::RowIterator) = columnnames(x) isrowtable(::Type{<:RowIterator}) = true columnaccess(::Type{<:RowIterator}) = true columns(x::RowIterator) = getfield(x, :columns) columnnames(x::RowIterator) = columnnames(columns(x)) getcolumn(x::RowIterator, nm::Symbol) = getcolumn(columns(x), nm) getcolumn(x::RowIterator, i::Int) = getcolumn(columns(x), i) materializer(x::RowIterator) = materializer(columns(x)) schema(x::RowIterator) = schema(columns(x)) @inline function Base.iterate(rows::RowIterator, st=1) st > length(rows) && return nothing return ColumnsRow(columns(rows), st), st + 1 end # this is our generic Tables.rows fallback definition @noinline nodefault(T) = throw(ArgumentError("no default `Tables.rows` implementation for type: $T")) rows(x::T) where {T} = _rows(x) # split out so we can re-use it in the matrix fallback function _rows(x::T) where {T} isrowtable(x) && return x # because this method is being called, we know `x` didn't define it's own Tables.rows # first check if it supports column access, and if so, wrap it in a RowIterator if columnaccess(x) cols = columns(x) return RowIterator(cols, Int(rowcount(cols))) # otherwise, if the input is at least iterable, we'll wrap it in an IteratorWrapper # which will iterate the input, validating that elements support the AbstractRow interface # and unwrapping any DataValues that are encountered elseif IteratorInterfaceExtensions.isiterable(x) return nondatavaluerows(x) end nodefault(T) end rows(::Type{T}) where {T} = throw(ArgumentError("no `Tables.rows` implementation for: $T. `Tables.rows` expects to work on a table _instance_ rather than a _type_.")) # for AbstractRow iterators, we define a "collect"-like routine to build up columns from iterated rows """ Tables.allocatecolumn(::Type{T}, len) => returns a column type (usually `AbstractVector`) with size to hold `len` elements Custom column types can override with an appropriate "scalar" element type that should dispatch to their column allocator. Alternatively, and more generally, custom scalars can overload `DataAPI.defaultarray` to signal the default array type. In this case the signaled array type must support a constructor accepting `undef` for initialization. """ function allocatecolumn(T, len) a = DataAPI.defaultarray(T, 1)(undef, len) Missing <: T && fill!(a, missing) return a end @inline function _allocatecolumns(::Schema{names, types}, len) where {names, types} if @generated vals = Tuple(:(allocatecolumn($(fieldtype(types, i)), len)) for i = 1:fieldcount(types)) return :(NamedTuple{$(map(Symbol, names))}(($(vals...),))) else return NamedTuple{map(Symbol, names)}(Tuple(allocatecolumn(fieldtype(types, i), len) for i = 1:fieldcount(types))) end end @inline function allocatecolumns(sch::Schema{names, types}, len) where {names, types} if fieldcount(types) <= SPECIALIZATION_THRESHOLD return _allocatecolumns(sch, len) else return NamedTuple{map(Symbol, names)}(Tuple(allocatecolumn(fieldtype(types, i), len) for i = 1:fieldcount(types))) end end # add! will push! or setindex! a value depending on if the row-iterator HasLength or not @inline add!(val, col::Int, nm, dest::AbstractArray, ::Union{Base.HasLength, Base.HasShape}, row) = setindex!(dest, val, row) @inline add!(val, col::Int, nm, dest::AbstractArray, L, row) = push!(dest, val) @inline function buildcolumns(schema, rowitr::T) where {T} L = Base.IteratorSize(T) len = Base.haslength(T) ? length(rowitr) : 0 nt = allocatecolumns(schema, len) for (i, row) in enumerate(rowitr) eachcolumns(add!, schema, row, nt, L, i) end return nt end @inline add!(dest::AbstractArray, val, ::Union{Base.HasLength, Base.HasShape}, row) = setindex!(dest, val, row) @inline add!(dest::AbstractArray, val, T, row) = push!(dest, val) @inline function add_or_widen!(val, col::Int, nm, dest::AbstractArray{T}, row, updated, L) where {T} if val isa T || promote_type(typeof(val), T) <: T add!(dest, val, L, row) return else new = allocatecolumn(promote_type(T, typeof(val)), length(dest)) row > 1 && copyto!(new, 1, dest, 1, row - 1) add!(new, val, L, row) updated[] = ntuple(i->i == col ? new : updated[][i], length(updated[])) return end end function __buildcolumns(rowitr, st, sch, columns, rownbr, updated) while true state = iterate(rowitr, st) state === nothing && break row, st = state rownbr += 1 eachcolumns(add_or_widen!, sch, row, columns, rownbr, updated, Base.IteratorSize(rowitr)) # little explanation here: we just called add_or_widen! for each column value of our row # note that when a column's type is widened, `updated` is set w/ the new set of columns # we then check if our current `columns` isn't the same object as our `updated` ref # if it isn't, we're going to call __buildcolumns again, passing our new updated ref as # columns, which allows __buildcolumns to specialize (i.e. recompile) based on the new types # of updated. So a new __buildcolumns will be compiled for each widening event. columns !== updated[] && return __buildcolumns(rowitr, st, sch, updated[], rownbr, updated) end return updated end # for the schema-less case, we do one extra step of initializing each column as an `EmptyVector` # and doing an initial widening for each column in _buildcolumns, before passing the widened # set of columns on to __buildcolumns struct EmptyVector <: AbstractVector{Union{}} len::Int end Base.IndexStyle(::Type{EmptyVector}) = Base.IndexLinear() Base.size(x::EmptyVector) = (x.len,) Base.getindex(x::EmptyVector, i::Int) = throw(UndefRefError()) function _buildcolumns(rowitr, row, st, sch, columns, updated) eachcolumns(add_or_widen!, sch, row, columns, 1, updated, Base.IteratorSize(rowitr)) return __buildcolumns(rowitr, st, sch, updated[], 1, updated) end if isdefined(Base, :fieldtypes) _fieldtypes = fieldtypes else _fieldtypes(T) = (fieldtype(T, i) for i = 1:fieldcount(T)) end # when Tables.schema(x) === nothing @inline function buildcolumns(::Nothing, rowitr::T) where {T} state = iterate(rowitr) if state === nothing # empty input iterator; check if it has eltype and maybe we can return a better typed empty NamedTuple if Base.IteratorEltype(rowitr) == Base.HasEltype() WT = wrappedtype(eltype(rowitr)) if WT <: Tuple return allocatecolumns(Schema((Symbol("Column$i") for i = 1:fieldcount(WT)), _fieldtypes(WT)), 0) elseif isconcretetype(WT) && fieldcount(WT) > 0 return allocatecolumns(Schema(fieldnames(WT), _fieldtypes(WT)), 0) end end return NamedTuple() end row, st = state names = Tuple(columnnames(row)) len = Base.haslength(T) ? length(rowitr) : 0 sch = Schema(names, nothing) columns = Tuple(EmptyVector(len) for _ = 1:length(names)) return NamedTuple{map(Symbol, names)}(_buildcolumns(rowitr, row, st, sch, columns, Ref{Any}(columns))[]) end """ Tables.CopiedColumns For some sinks, there's a concern about whether they can safely "own" columns from the input. If mutation will be allowed, to be safe, they should always copy input columns, to avoid unintended mutation to the original source. When we've called `buildcolumns`, however, Tables.jl essentially built/owns the columns, and it's happy to pass ownership to the sink. Thus, any built columns will be wrapped in a `CopiedColumns` struct to signal to the sink that essentially "a copy has already been made" and they're safe to assume ownership. """ struct CopiedColumns{T} <: AbstractColumns x::T end source(x::CopiedColumns) = getfield(x, :x) istable(::Type{<:CopiedColumns}) = true columnaccess(::Type{<:CopiedColumns}) = true columns(x::CopiedColumns) = x schema(x::CopiedColumns) = schema(source(x)) materializer(x::CopiedColumns) = materializer(source(x)) getcolumn(x::CopiedColumns, ::Type{T}, col::Int, nm::Symbol) where {T} = getcolumn(source(x), T, col, nm) getcolumn(x::CopiedColumns, i::Int) = getcolumn(source(x), i) getcolumn(x::CopiedColumns, nm::Symbol) = getcolumn(source(x), nm) columnnames(x::CopiedColumns) = columnnames(source(x)) # here's our generic fallback Tables.columns definition @inline columns(x::T) where {T} = _columns(x) @inline function _columns(x::T) where {T} # because this method is being called, we know `x` didn't define it's own Tables.columns method # first check if it explicitly supports row access, and if so, build up the desired columns if rowaccess(x) r = rows(x) return CopiedColumns(buildcolumns(schema(r), r)) # though not widely supported, if a source supports the TableTraits column interface, use it elseif TableTraits.supports_get_columns_copy_using_missing(x) return CopiedColumns(TableTraits.get_columns_copy_using_missing(x)) # otherwise, if the source is at least iterable, we'll wrap it in an IteratorWrapper and # build columns from that, which will check if the source correctly iterates valid AbstractRow objects # and unwraps DataValues for us elseif IteratorInterfaceExtensions.isiterable(x) iw = nondatavaluerows(x) return CopiedColumns(buildcolumns(schema(iw), iw)) end throw(ArgumentError("no default `Tables.columns` implementation for type: $T")) end # implement default nrow and ncol methods for DataAPI.jl # this covers also MatrixTable DataAPI.nrow(table::AbstractColumns) = rowcount(table) DataAPI.ncol(table::AbstractColumns) = length(columnnames(table)) DataAPI.nrow(table::AbstractRowTable) = length(table) DataAPI.ncol(table::AbstractRowTable) = isempty(table) ? 0 : length(columnnames(first(table))) 8/opt/julia/packages/Tables/NSGZI/src/tofromdatavalues.jlBase.@pure function nondatavaluenamedtuple(::Type{NT}) where {names, NT <: NamedTuple{names}} TT = Tuple{Any[ DataValueInterfaces.nondatavaluetype(fieldtype(NT, i)) for i = 1:fieldcount(NT) ]...} return NamedTuple{names, TT} end Base.@pure function datavaluenamedtuple(::Tables.Schema{names, types}) where {names, types} TT = Tuple{Any[ DataValueInterfaces.datavaluetype(fieldtype(types, i)) for i = 1:fieldcount(types) ]...} return NamedTuple{names, TT} end # IteratorWrapper takes an input Row iterators, it will unwrap any DataValue elements as plain Union{T, Missing} struct IteratorWrapper{S} x::S end """ Tables.nondatavaluerows(x) Takes any Queryverse-compatible `NamedTuple` iterator source and converts to a Tables.jl-compatible `AbstractRow` iterator. Will automatically unwrap any `DataValue`s, replacing `NA` with `missing`. Useful for translating Query.jl results back to non-`DataValue`-based tables. """ nondatavaluerows(x) = IteratorWrapper(IteratorInterfaceExtensions.getiterator(x)) Tables.istable(::Type{<:IteratorWrapper}) = true Tables.rowaccess(::Type{<:IteratorWrapper}) = true Tables.rows(x::IteratorWrapper) = x function Tables.schema(dv::IteratorWrapper{S}) where {S} eT = eltype(dv.x) (!(eT <: NamedTuple) || eT === Union{}) && return schema(dv.x) return Tables.Schema(nondatavaluenamedtuple(eT)) end Base.IteratorEltype(::Type{I}) where {S, I<:IteratorWrapper{S}} = Base.IteratorEltype(S) Base.eltype(::Type{I}) where {S, I<:IteratorWrapper{S}} = IteratorRow{eltype(S)} Base.IteratorSize(::Type{I}) where {S, I<:IteratorWrapper{S}} = Base.IteratorSize(S) Base.length(rows::IteratorWrapper) = length(rows.x) Base.size(rows::IteratorWrapper) = size(rows.x) @noinline invalidtable(::T, ::S) where {T, S} = throw(ArgumentError("'$T' iterates '$S' values, which doesn't satisfy the Tables.jl `AbstractRow` interface")) @inline function Base.iterate(rows::IteratorWrapper) x = iterate(rows.x) x === nothing && return nothing row, st = x columnnames(row) === () && invalidtable(rows.x, row) return IteratorRow(row), st end @inline function Base.iterate(rows::IteratorWrapper, st) x = iterate(rows.x, st) x === nothing && return nothing row, st = x return IteratorRow(row), st end struct IteratorRow{T} <: AbstractRow row::T end getrow(r::IteratorRow) = getfield(r, :row) wrappedtype(::Type{I}) where {T, I<:IteratorRow{T}} = T wrappedtype(::Type{T}) where {T} = T unwrap(::Type{T}, x) where {T} = convert(T, x) unwrap(::Type{Any}, x) = x.hasvalue ? x.value : missing nondv(T) = DataValueInterfaces.nondatavaluetype(T) undatavalue(x::T) where {T} = T == nondv(T) ? x : unwrap(nondv(T), x) @inline getcolumn(r::IteratorRow, ::Type{T}, col::Int, nm::Symbol) where {T} = undatavalue(getcolumn(getrow(r), T, col, nm)) @inline getcolumn(r::IteratorRow, nm::Symbol) = undatavalue(getcolumn(getrow(r), nm)) @inline getcolumn(r::IteratorRow, i::Int) = undatavalue(getcolumn(getrow(r), i)) columnnames(r::IteratorRow) = columnnames(getrow(r)) # DataValueRowIterator wraps a Row iterator and will wrap `Union{T, Missing}` typed fields in DataValues struct DataValueRowIterator{NT, sch, S} x::S end """ Tables.datavaluerows(x) => NamedTuple iterator Takes any table input `x` and returns a `NamedTuple` iterator that will replace missing values with `DataValue`-wrapped values; this allows any table type to satisfy the TableTraits.jl Queryverse integration interface by defining: ``` IteratorInterfaceExtensions.getiterator(x::MyTable) = Tables.datavaluerows(x) ``` """ function datavaluerows(x) r = Tables.rows(x) s = Tables.schema(r) s === nothing && error("Schemaless sources cannot be passed to datavaluerows.") return DataValueRowIterator{datavaluenamedtuple(s), typeof(s), typeof(r)}(r) end Base.eltype(::Type{D}) where {NT, D<:DataValueRowIterator{NT}} = NT Base.IteratorSize(::Type{D}) where {NT, sch, S, D<:DataValueRowIterator{NT, sch, S}} = Base.IteratorSize(S) Base.length(rows::DataValueRowIterator) = length(rows.x) Base.size(rows::DataValueRowIterator) = size(rows.x) function Base.iterate(rows::DataValueRowIterator{NamedTuple{names, dtypes}, Schema{names, rtypes}, S}, st=()) where {names, dtypes, rtypes, S} # use of @generated justified here because Queryverse has stated only support for "reasonable amount of columns" if @generated vals = Any[ :(convert($(fieldtype(dtypes, i)), getcolumn(row, $(fieldtype(rtypes, i)), $i, $(Meta.QuoteNode(names[i]))))) for i = 1:length(names) ] ret = Expr(:new, :(NamedTuple{names, dtypes}), vals...) q = quote x = iterate(rows.x, st...) x === nothing && return nothing row, st = x return $ret, (st,) end # @show q return q else x = iterate(rows.x, st...) x === nothing && return nothing row, st = x return NamedTuple{names, dtypes}(Tuple(convert(fieldtype(dtypes, i), getcolumn(row, fieldtype(rtypes, i), i, names[i])) for i = 1:length(names))), (st,) end end ./opt/julia/packages/Tables/NSGZI/src/matrix.jlZistable(::Type{<:AbstractMatrix}) = false # for AbstractMatrix types, we don't want to automatically treat them as tables # *unless* someone has defined their own w/ appropriate Tables definitions rows(m::T) where {T <: AbstractMatrix} = istable(m) ? _rows(m) : throw(ArgumentError("a '$T' is not a table; see `?Tables.table` for ways to treat an AbstractVecOrMat as a table")) columns(m::T) where {T <: AbstractMatrix} = istable(m) ? _columns(m) : throw(ArgumentError("a '$T' is not a table; see `?Tables.table` for ways to treat an AbstractVecOrMat as a table")) struct MatrixTable{T <: AbstractVecOrMat} <: AbstractColumns names::Vector{Symbol} lookup::Dict{Symbol, Int} matrix::T end struct MatrixRowTable{T <: AbstractVecOrMat} names::Vector{Symbol} lookup::Dict{Symbol, Int} matrix::T end const MatrixTables{T} = Union{MatrixTable{T}, MatrixRowTable{T}} names(m::MatrixTables) = getfield(m, :names) # row interface istable(::Type{<:MatrixTable}) = true rowaccess(::Type{<:MatrixTable}) = true rows(x::MatrixTable) = MatrixRowTable(getfield(x, :names), getfield(x, :lookup), getfield(x, :matrix)) struct MatrixRow{T} <: AbstractRow row::Int source::MatrixRowTable{T} end getcolumn(m::MatrixRow, ::Type, col::Int, nm::Symbol) = getfield(getfield(m, :source), :matrix)[getfield(m, :row), col] getcolumn(m::MatrixRow, i::Int) = getfield(getfield(m, :source), :matrix)[getfield(m, :row), i] getcolumn(m::MatrixRow, nm::Symbol) = getfield(getfield(m, :source), :matrix)[getfield(m, :row), getfield(getfield(m, :source), :lookup)[nm]] columnnames(m::MatrixRow) = names(getfield(m, :source)) schema(m::MatrixTables{T}) where {T} = Schema(Tuple(names(m)), NTuple{size(getfield(m, :matrix), 2), eltype(T)}) Base.eltype(::Type{M}) where {T,M<:MatrixRowTable{T}} = MatrixRow{T} Base.length(m::MatrixRowTable) = size(getfield(m, :matrix), 1) Base.iterate(m::MatrixRowTable, st=1) = st > length(m) ? nothing : (MatrixRow(st, m), st + 1) # column interface columnaccess(::Type{<:MatrixTable}) = true columns(m::MatrixTable) = m getcolumn(m::MatrixTable, i::Int) = view(getfield(m, :matrix), :, i) getcolumn(m::MatrixTable, ::Type, col::Int, nm::Symbol) = getcolumn(m, col) getcolumn(m::MatrixTable, nm::Symbol) = getcolumn(m, getfield(m, :lookup)[nm]) columnnames(m::MatrixTable) = names(m) """ Tables.table(m::AbstractVecOrMat; [header]) Wrap an `AbstractVecOrMat` (`Matrix`, `Vector`, `Adjoint`, etc.) in a `MatrixTable`, which satisfies the Tables.jl interface. (An `AbstractVector` is treated as a 1-column matrix.) This allows accessing the matrix via `Tables.rows` and `Tables.columns`. An optional keyword argument iterator `header` can be passed which will be converted to a `Vector{Symbol}` to be used as the column names. Note that no copy of the `AbstractVecOrMat` is made. """ function table(m::AbstractVecOrMat; header=[Symbol("Column$i") for i = 1:size(m, 2)]) symbol_header = header isa Vector{Symbol} ? header : [Symbol(h) for h in header] if length(symbol_header) != size(m, 2) throw(ArgumentError("provided column names `header` length must match number of columns in matrix ($(size(m, 2)))")) end lookup = Dict(nm=>i for (i, nm) in enumerate(symbol_header)) return MatrixTable(symbol_header, lookup, m) end """ Tables.matrix(table; transpose::Bool=false) Materialize any table source input as a new `Matrix` or in the case of a `MatrixTable` return the originally wrapped matrix. If the table column element types are not homogeneous, they will be promoted to a common type in the materialized `Matrix`. Note that column names are ignored in the conversion. By default, input table columns will be materialized as corresponding matrix columns; passing `transpose=true` will transpose the input with input columns as matrix rows or in the case of a `MatrixTable` apply `permutedims` to the originally wrapped matrix. """ function matrix(table; transpose::Bool=false) cols = Columns(table) types = schema(cols).types T = reduce(promote_type, types) n, p = rowcount(cols), length(types) if !transpose matrix = Matrix{T}(undef, n, p) for (i, col) in enumerate(cols) matrix[:, i] .= col end else matrix = Matrix{T}(undef, p, n) for (i, col) in enumerate(cols) matrix[i, :] .= col end end return matrix end function matrix(table::MatrixTable; transpose::Bool=false) matrix = getfield(table, :matrix) if transpose return permutedims(matrix) elseif matrix isa AbstractVector # always return a matrix, for type stability return reshape(matrix, :, 1) else return matrix end end # implement default nrow and ncol methods for DataAPI.jl DataAPI.nrow(table::MatrixRowTable) = length(table) DataAPI.ncol(table::MatrixRowTable) = size(getfield(table, :matrix), 2) # this is correct even if m is a vector -/opt/julia/packages/Tables/NSGZI/src/dicts.jl # Dict of Vectors as table struct DictColumnTable <: AbstractColumns schema::Schema values::OrderedDict{Symbol, AbstractVector} end """ Tables.dictcolumntable(x) => Tables.DictColumnTable Take any Tables.jl-compatible source `x` and return a `DictColumnTable`, which can be thought of as a `OrderedDict` mapping column names as `Symbol`s to `AbstractVector`s. The order of the input table columns is preserved via the `Tables.schema(::DictColumnTable)`. For "schema-less" input tables, `dictcolumntable` employs a "column unioning" behavior, as opposed to inferring the schema from the first row like `Tables.columns`. This means that as rows are iterated, each value from the row is joined into an aggregate final set of columns. This is especially useful when input table rows may not include columns if the value is missing, instead of including an actual value `missing`, which is common in json, for example. This results in a performance cost tracking all seen values and inferring the final unioned schemas, so it's recommended to use only when needed. """ function dictcolumntable(x) if columnaccess(x) cols = columns(x) names = columnnames(cols) sch = schema(cols) out = OrderedDict(nm => getcolumn(cols, nm) for nm in names) else r = rows(x) L = Base.IteratorSize(typeof(r)) len = Base.haslength(r) ? length(r) : 0 sch = schema(r) if sch !== nothing names, types = sch.names, sch.types out = OrderedDict{Int, AbstractVector}(i => allocatecolumn(types[i], len) for i = 1:length(types)) for (i, row) in enumerate(r) eachcolumns(add!, sch, row, out, L, i) end out = OrderedDict(names[k] => v for (k, v) in out) else names = Symbol[] seen = Set{Symbol}() out = OrderedDict{Symbol, AbstractVector}() for (i, row) in enumerate(r) for nm in columnnames(row) push!(seen, nm) val = getcolumn(row, nm) if haskey(out, nm) col = out[nm] if typeof(val) <: eltype(col) add!(val, 0, nm, col, L, i) else # widen column type new = allocatecolumn(promote_type(eltype(col), typeof(val)), length(col)) i > 1 && copyto!(new, 1, col, 1, i - 1) add!(new, val, L, i) out[nm] = new end else push!(names, nm) if i == 1 new = allocatecolumn(typeof(val), len) add!(new, val, L, i) out[nm] = new else new = allocatecolumn(Union{Missing, typeof(val)}, len) add!(new, val, L, i) out[nm] = new end end end for nm in names if !(nm in seen) col = out[nm] if !(eltype(col) >: Missing) new = allocatecolumn(Union{Missing, eltype(col)}, len) i > 1 && copyto!(new, 1, col, 1, i - 1) out[nm] = new end end end empty!(seen) end sch = Schema(collect(keys(out)), eltype.(values(out))) end end return DictColumnTable(sch, out) end istable(::Type{DictColumnTable}) = true columnaccess(::Type{DictColumnTable}) = true columns(x::DictColumnTable) = x schema(x::DictColumnTable) = getfield(x, :schema) columnnames(x::DictColumnTable) = getfield(x, :schema).names getcolumn(x::DictColumnTable, i::Int) = getfield(x, :values)[columnnames(x)[i]] getcolumn(x::DictColumnTable, nm::Symbol) = getfield(x, :values)[nm] # Vector of Dicts as table struct DictRowTable names::Vector{Symbol} types::Dict{Symbol, Type} values::Vector{Dict{Symbol, Any}} end isrowtable(::Type{DictRowTable}) = true schema(x::DictRowTable) = Schema(getfield(x, :names), [getfield(x, :types)[nm] for nm in getfield(x, :names)]) struct DictRow <: AbstractRow names::Vector{Symbol} row::Dict{Symbol, Any} end columnnames(x::DictRow) = getfield(x, :names) getcolumn(x::DictRow, i::Int) = get(getfield(x, :row), columnnames(x)[i], missing) getcolumn(x::DictRow, nm::Symbol) = get(getfield(x, :row), nm, missing) Base.IteratorSize(::Type{DictRowTable}) = Base.HasLength() Base.length(x::DictRowTable) = length(getfield(x, :values)) Base.IteratorEltype(::Type{DictRowTable}) = Base.HasEltype() Base.eltype(::Type{DictRowTable}) = DictRow function Base.iterate(x::DictRowTable, st=1) st > length(x) && return nothing return DictRow(x.names, x.values[st]), st + 1 end function subset(x::DictRowTable, inds; viewhint::Union{Bool,Nothing}=nothing, view::Union{Bool,Nothing}=nothing) if view !== nothing @warn "`view` keyword argument is deprecated for `Tables.subset`, use `viewhint` instead" viewhint = view end values = viewhint === true ? Base.view(getfield(x, :values), inds) : getfield(x, :values)[inds] if inds isa Integer return DictRow(getfield(x, :names), values) else values isa AbstractVector || throw(ArgumentError("`Tables.subset`: invalid `inds` argument, expected `RowTable` output, got $(typeof(ret))")) return DictRowTable(getfield(x, :names), getfield(x, :types), values) end end """ Tables.dictrowtable(x) => Tables.DictRowTable Take any Tables.jl-compatible source `x` and return a `DictRowTable`, which can be thought of as a `Vector` of `OrderedDict` rows mapping column names as `Symbol`s to values. The order of the input table columns is preserved via the `Tables.schema(::DictRowTable)`. For "schema-less" input tables, `dictrowtable` employs a "column unioning" behavior, as opposed to inferring the schema from the first row like `Tables.columns`. This means that as rows are iterated, each value from the row is joined into an aggregate final set of columns. This is especially useful when input table rows may not include columns if the value is missing, instead of including an actual value `missing`, which is common in json, for example. This results in a performance cost tracking all seen values and inferring the final unioned schemas, so it's recommended to use only when the union behavior is needed. """ function dictrowtable(x) names = Symbol[] seen = Set{Symbol}() types = OrderedDict{Symbol, Type}() r = rows(x) L = Base.IteratorSize(typeof(r)) out = Vector{OrderedDict{Symbol, Any}}(undef, Base.haslength(r) ? length(r) : 0) for (i, drow) in enumerate(r) row = OrderedDict{Symbol, Any}(nm => getcolumn(drow, nm) for nm in columnnames(drow)) add!(row, 0, :_, out, L, i) if isempty(names) for (k, v) in row push!(names, k) types[k] = typeof(v) end seen = Set(names) else for nm in names if haskey(row, nm) T = types[nm] v = row[nm] if !(typeof(v) <: T) types[nm] = Union{T, typeof(v)} end else types[nm] = Union{Missing, types[nm]} end end for (k, v) in row if !(k in seen) push!(seen, k) push!(names, k) # we mark the type as Union{T, Missing} here because # we're at least on the 2nd row, and we didn't see # this column in the 1st row, so its value will be # `missing` for that row types[k] = Union{typeof(v), Missing} end end end end return DictRowTable(names, types, out) end # implement default nrow and ncol methods for DataAPI.jl DataAPI.nrow(table::DictRowTable) = length(table) DataAPI.ncol(table::DictRowTable) = length(getfield(table, :names)) aֿu