//===========================================================================
/*!
*
*
* \brief Weighted data sets for (un-)supervised learning.
*
*
* \par
* This file provides containers for data used by the models, loss
* functions, and learning algorithms (trainers). The reason for
* dedicated containers of this type is that data often need to be
* split into subsets, such as training and test data, or folds in
* cross-validation. The containers in this file provide memory
* efficient mechanisms for managing and providing such subsets.
* The speciality of these containers are that they are weighted.
*
*
*
* \author O. Krause
* \date 2014
*
*
* \par Copyright 1995-2017 Shark Development Team
*
*
* This file is part of Shark.
*
*
* Shark is free software: you can redistribute it and/or modify
* it under the terms of the GNU Lesser General Public License as published
* by the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* Shark is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public License
* along with Shark. If not, see .
*
*/
//===========================================================================
#ifndef SHARK_DATA_WEIGHTED_DATASET_H
#define SHARK_DATA_WEIGHTED_DATASET_H
#include
namespace shark {
///\brief Input-Label pair of data
template
struct WeightedDataPair{
DataType data;
WeightType weight;
WeightedDataPair(){}
template
WeightedDataPair(
DataT&& data,
WeightT&& weight
):data(data),weight(weight){}
template
WeightedDataPair(
WeightedDataPair const& pair
):data(pair.data),weight(pair.weight){}
template
WeightedDataPair& operator=(WeightedDataPair const& batch){
data = batch.data;
weight = batch.weight;
return *this;
}
WeightedDataPair& operator=(WeightedDataPair const& batch){
data = batch.data;
weight = batch.weight;
return *this;
}
};
template
void swap(WeightedDataPair&& p1, WeightedDataPair&& p2){
using std::swap;
swap(std::forward(p1.data),std::forward(p2.data));
swap(std::forward(p1.weight),std::forward(p2.weight));
}
template
struct WeightedDataBatch{
private:
typedef typename BatchTraits::type >::type DataBatchTraits;
typedef typename BatchTraits::type >::type WeightBatchTraits;
public:
DataBatchType data;
WeightBatchType weight;
typedef WeightedDataPair<
typename DataBatchTraits::value_type,
typename WeightBatchTraits::value_type
> value_type;
typedef WeightedDataPair<
decltype(getBatchElement(std::declval(),0)),
decltype(getBatchElement(std::declval(),0))
> reference;
typedef WeightedDataPair<
decltype(getBatchElement(std::declval::type&>(),0)),
decltype(getBatchElement(std::declval::type&>(),0))
> const_reference;
typedef IndexingIterator iterator;
typedef IndexingIterator const_iterator;
template
WeightedDataBatch(
D&& data,
W&& weight
):data(data),weight(weight){}
template
WeightedDataBatch(
std::size_t size,Pair const& p
):data(DataBatchTraits::createBatch(p.data,size)),weight(WeightBatchTraits::createBatch(p.weight,size)){}
template
WeightedDataBatch& operator=(WeightedDataBatch const& batch){
data = batch.data;
weight = batch.weight;
return *this;
}
std::size_t size()const{
return DataBatchTraits::size(data);
}
iterator begin(){
return iterator(*this,0);
}
const_iterator begin()const{
return const_iterator(*this,0);
}
iterator end(){
return iterator(*this,size());
}
const_iterator end()const{
return const_iterator(*this,size());
}
reference operator[](std::size_t i){
return reference(getBatchElement(data,i),getBatchElement(weight,i));
}
const_reference operator[](std::size_t i)const{
return const_reference(getBatchElement(data,i),getBatchElement(weight,i));
}
};
template
void swap(WeightedDataBatch& p1, WeightedDataBatch& p2){
using std::swap;
swap(p1.data,p2.data);
swap(p1.weight,p2.weight);
}
template
struct Batch >
: public detail::SimpleBatch<
WeightedDataBatch::type, typename detail::element_to_batch::type>
>{};
template
struct BatchTraits >{
typedef typename detail::batch_to_element::type DataElem;
typedef typename detail::batch_to_element::type WeightElem;
typedef Batch > type;
};
namespace detail{
template
class BaseWeightedDataset : public ISerializable
{
public:
typedef typename DataContainerT::element_type DataType;
typedef double WeightType;
typedef DataContainerT DataContainer;
typedef Data WeightContainer;
typedef typename DataContainer::IndexSet IndexSet;
// TYPEDEFS FOR PAIRS
typedef WeightedDataPair<
DataType,
WeightType
> element_type;
typedef WeightedDataBatch<
typename DataContainer::batch_type,
typename WeightContainer::batch_type
> batch_type;
// TYPEDEFS FOR BATCH REFERENCES
typedef WeightedDataBatch<
typename DataContainer::batch_reference,
typename WeightContainer::batch_reference
> batch_reference;
typedef WeightedDataBatch<
typename DataContainer::const_batch_reference,
typename WeightContainer::const_batch_reference
> const_batch_reference;
typedef typename Batch::reference element_reference;
typedef typename Batch::const_reference const_element_reference;
typedef boost::iterator_range< detail::DataElementIterator > > element_range;
typedef boost::iterator_range< detail::DataElementIterator const> > const_element_range;
typedef detail::BatchRange > batch_range;
typedef detail::BatchRange const> const_batch_range;
///\brief Returns the range of elements.
///
///It is compatible to boost::range and STL and can be used whenever an algorithm requires
///element access via begin()/end() in which case data.elements() provides the correct interface
const_element_range elements()const{
return const_element_range(
detail::DataElementIterator const>(this,0,0,0),
detail::DataElementIterator const>(this,numberOfBatches(),0,numberOfElements())
);
}
///\brief Returns therange of elements.
///
///It is compatible to boost::range and STL and can be used whenever an algorithm requires
///element access via begin()/end() in which case data.elements() provides the correct interface
element_range elements(){
return element_range(
detail::DataElementIterator >(this,0,0,0),
detail::DataElementIterator >(this,numberOfBatches(),0,numberOfElements())
);
}
///\brief Returns the range of batches.
///
///It is compatible to boost::range and STL and can be used whenever an algorithm requires
///element access via begin()/end() in which case data.elements() provides the correct interface
const_batch_range batches()const{
return const_batch_range(this);
}
///\brief Returns the range of batches.
///
///It is compatible to boost::range and STL and can be used whenever an algorithm requires
///element access via begin()/end() in which case data.elements() provides the correct interface
batch_range batches(){
return batch_range(this);
}
///\brief Returns the number of batches of the set.
std::size_t numberOfBatches() const{
return m_data.numberOfBatches();
}
///\brief Returns the total number of elements.
std::size_t numberOfElements() const{
return m_data.numberOfElements();
}
///\brief Check whether the set is empty.
bool empty() const{
return m_data.empty();
}
///\brief Access to the stored data points as a separate container.
DataContainer const& data() const{
return m_data;
}
///\brief Access to the stored data points as a separate container.
DataContainer& data(){
return m_data;
}
///\brief Access to weights as a separate container.
WeightContainer const& weights() const{
return m_weights;
}
///\brief Access to weights as a separate container.
WeightContainer& weights(){
return m_weights;
}
// CONSTRUCTORS
///\brief Constructs an Empty data set.
BaseWeightedDataset()
{}
///\brief Create an empty set with just the correct number of batches.
///
/// The user must initialize the dataset after that by himself.
BaseWeightedDataset(std::size_t numBatches)
: m_data(numBatches),m_weights(numBatches)
{}
/// \brief Construtor using a single element as blueprint to create a dataset with a specified number of elements.
///
/// Optionally the desired batch Size can be set
///
///@param size the new size of the container
///@param element the blueprint element from which to create the Container
///@param batchSize the size of the batches. if this is 0, the size is unlimited
BaseWeightedDataset(std::size_t size, element_type const& element, std::size_t batchSize)
: m_data(size,element.data,batchSize)
, m_weights(size,element.weight,batchSize)
{}
///\brief Construction from data and a dataset rpresnting the weights
///
/// Beware that when calling this constructor the organization of batches must be equal in both
/// containers. This Constructor will not reorganize the data!
BaseWeightedDataset(DataContainer const& data, Data const& weights)
: m_data(data), m_weights(weights)
{
SHARK_RUNTIME_CHECK(data.numberOfElements() == weights.numberOfElements(), "[ BaseWeightedDataset::WeightedUnlabeledData] number of data and number of weights must agree");
#ifndef DNDEBUG
for(std::size_t i = 0; i != data.numberOfBatches(); ++i){
SIZE_CHECK(batchSize(data.batch(i)) == batchSize(weights.batch(i)));
}
#endif
}
///\brief Construction from data. All points get the same weight assigned
BaseWeightedDataset(DataContainer const& data, double weight)
: m_data(data), m_weights(data.numberOfBatches())
{
for(std::size_t i = 0; i != numberOfBatches(); ++i){
m_weights.batch(i) = Batch::type(batchSize(m_data.batch(i)),weight);
}
}
// ELEMENT ACCESS
element_reference element(std::size_t i){
return *(detail::DataElementIterator >(this,0,0,0)+i);
}
const_element_reference element(std::size_t i) const{
return *(detail::DataElementIterator const>(this,0,0,0)+i);
}
// BATCH ACCESS
batch_reference batch(std::size_t i){
return batch_reference(m_data.batch(i),m_weights.batch(i));
}
const_batch_reference batch(std::size_t i) const{
return const_batch_reference(m_data.batch(i),m_weights.batch(i));
}
// MISC
/// from ISerializable
void read(InArchive& archive){
archive & m_data;
archive & m_weights;
}
/// from ISerializable
void write(OutArchive& archive) const{
archive & m_data;
archive & m_weights;
}
///\brief This method makes the vector independent of all siblings and parents.
virtual void makeIndependent(){
m_weights.makeIndependent();
m_data.makeIndependent();
}
///\brief shuffles all elements in the entire dataset (that is, also across the batches)
virtual void shuffle(){
shark::shuffle(this->elements().begin(),this->elements().end(), random::globalRng);
}
void splitBatch(std::size_t batch, std::size_t elementIndex){
m_data.splitBatch(batch,elementIndex);
m_weights.splitBatch(batch,elementIndex);
}
/// \brief Appends the contents of another data object to the end
///
/// The batches are not copied but now referenced from both datasets. Thus changing the appended
/// dataset might change this one as well.
void append(BaseWeightedDataset const& other){
m_data.append(other.m_data);
m_weights.append(other.m_weights);
}
///\brief Reorders the batch structure in the container to that indicated by the batchSizes vector
///
///After the operation the container will contain batchSizes.size() batches with the i-th batch having size batchSize[i].
///However the sum of all batch sizes must be equal to the current number of elements
template
void repartition(Range const& batchSizes){
m_data.repartition(batchSizes);
m_weights.repartition(batchSizes);
}
/// \brief Creates a vector with the batch sizes of every batch.
///
/// This method can be used together with repartition to ensure
/// that two datasets have the same batch structure.
std::vector getPartitioning()const{
return m_data.getPartitioning();
}
friend void swap( BaseWeightedDataset& a, BaseWeightedDataset& b){
swap(a.m_data,b.m_data);
swap(a.m_weights,b.m_weights);
}
// SUBSETS
///\brief Fill in the subset defined by the list of indices.
BaseWeightedDataset indexedSubset(IndexSet const& indices) const{
BaseWeightedDataset subset;
subset.m_data = m_data.indexedSubset(indices);
subset.m_weights = m_weights.indexedSubset(indices);
return subset;
}
private:
DataContainer m_data; /// point data
WeightContainer m_weights; /// weight data
};
}
///
/// \brief Weighted data set for unsupervised learning
///
/// The WeightedUnlabeledData class extends UnlabeledData for the
/// representation of data. In addition it holds and provides access to the corresponding weights.
///
/// WeightedUnlabeledData tries to mimic the underlying data as pairs of data points and weights.
/// this means that when accessing a batch by calling batch(i) or choosing one of the iterators
/// one access the input batch by batch(i).data and the weights by batch(i).weight
///
///this also holds true for single element access using operator(). Be aware, that direct access to element is
///a linear time operation. So it is not advisable to iterate over the elements, but instead iterate over the batches.
template
class WeightedUnlabeledData : public detail::BaseWeightedDataset >
{
private:
typedef detail::BaseWeightedDataset > base_type;
public:
using base_type::data;
using base_type::weights;
typedef typename base_type::DataType DataType;
typedef typename base_type::WeightType WeightType;
typedef typename base_type::element_type element_type;
typedef DataT InputType;
BOOST_STATIC_CONSTANT(std::size_t, DefaultBatchSize = UnlabeledData::DefaultBatchSize);
// CONSTRUCTORS
///\brief Empty data set.
WeightedUnlabeledData()
{}
///\brief Create an empty set with just the correct number of batches.
///
/// The user must initialize the dataset after that by himself.
WeightedUnlabeledData(std::size_t numBatches)
: base_type(numBatches)
{}
/// \brief Construtor using a single element as blueprint to create a dataset with a specified number of elements.
///
/// Optionally the desired batch Size can be set
///
///@param size the new size of the container
///@param element the blueprint element from which to create the Container
///@param batchSize the size of the batches. if this is 0, the size is unlimited
WeightedUnlabeledData(std::size_t size, element_type const& element, std::size_t batchSize = DefaultBatchSize)
: base_type(size,element,batchSize){}
///\brief Construction from data.
///
/// Beware that when calling this constructor the organization of batches must be equal in both
/// containers. This Constructor will not reorganize the data!
WeightedUnlabeledData(UnlabeledData const& data, Data const& weights)
: base_type(data,weights)
{}
///\brief Construction from data and a constant weight for all elements
WeightedUnlabeledData(UnlabeledData const& data, double weight)
: base_type(data,weight)
{}
//we additionally add the two below for compatibility with UnlabeledData
///\brief Access to the inputs as a separate container.
UnlabeledData const& inputs() const{
return data();
}
///\brief Access to the inputs as a separate container.
UnlabeledData& inputs(){
return data();
}
///\brief Returns the Shape of the data.
Shape const& shape() const{
return data().shape();
}
///\brief Returns the Shape of the data.
Shape& shape(){
return data().shape();
}
///\brief Splits the container into two independent parts. The left part remains in the container, the right is stored as return type
///
///Order of elements remain unchanged. The SharedVector is not allowed to be shared for
///this to work.
WeightedUnlabeledData splice(std::size_t batch){
return WeightedUnlabeledData(data().splice(batch),weights().splice(batch));
}
friend void swap(WeightedUnlabeledData& a, WeightedUnlabeledData& b){
swap(static_cast(a),static_cast(b));
}
};
///brief Outstream of elements for weighted data.
template
std::ostream &operator << (std::ostream &stream, const WeightedUnlabeledData& d) {
for(auto elem: d.elements())
stream << elem.weight << " [" << elem.data<<"]"<< "\n";
return stream;
}
/// \brief creates a weighted unweighted data object from two ranges, representing data and weights
template
typename boost::disable_if<
boost::is_arithmetic,
WeightedUnlabeledData<
typename boost::range_value::type
>
>::type createUnlabeledDataFromRange(DataRange const& data, WeightRange const& weights, std::size_t batchSize = 0){
SHARK_RUNTIME_CHECK(batchSize(data) == batchSize(weights),"Number of datapoints and number of weights must agree");
typedef typename boost::range_value::type Data;
if (batchSize == 0)
batchSize = WeightedUnlabeledData::DefaultBatchSize;
return WeightedUnlabeledData(
shark::createUnlabeledDataFromRange(data,batchSize),
createDataFromRange(weights,batchSize)
);
}
///
/// \brief Weighted data set for supervised learning
///
/// The WeightedLabeledData class extends LabeledData for the
/// representation of data. In addition it holds and provides access to the corresponding weights.
///
/// WeightedLabeledData tries to mimic the underlying data as pairs of data tuples(input,label) and weights.
/// this means that when accessing a batch by calling batch(i) or choosing one of the iterators
/// one access the databatch by batch(i).data and the weights by batch(i).weight. to access the points and labels
/// use batch(i).data.input and batch(i).data.label
///
///this also holds true for single element access using operator(). Be aware, that direct access to element is
///a linear time operation. So it is not advisable to iterate over the elements, but instead iterate over the batches.
///
/// It is possible to gains everal views on the set. one can either get access to inputs, labels and weights separately
/// or gain access to the unweighted dataset of inputs and labels. Additionally the sets support on-the-fly creation
/// of the (inputs,weights) subset for unsupervised weighted learning
template
class WeightedLabeledData : public detail::BaseWeightedDataset >
{
private:
typedef detail::BaseWeightedDataset > base_type;
public:
typedef typename base_type::DataType DataType;
typedef typename base_type::WeightType WeightType;
typedef InputT InputType;
typedef LabelT LabelType;
typedef typename base_type::element_type element_type;
using base_type::data;
using base_type::weights;
BOOST_STATIC_CONSTANT(std::size_t, DefaultBatchSize = (LabeledData::DefaultBatchSize));
// CONSTRUCTORS
///\brief Empty data set.
WeightedLabeledData()
{}
///\brief Create an empty set with just the correct number of batches.
///
/// The user must initialize the dataset after that by himself.
WeightedLabeledData(std::size_t numBatches)
: base_type(numBatches)
{}
/// \brief Construtor using a single element as blueprint to create a dataset with a specified number of elements.
///
/// Optionally the desired batch Size can be set
///
///@param size the new size of the container
///@param element the blueprint element from which to create the Container
///@param batchSize the size of the batches. if this is 0, the size is unlimited
WeightedLabeledData(std::size_t size, element_type const& element, std::size_t batchSize = DefaultBatchSize)
: base_type(size,element,batchSize){}
///\brief Construction from data.
///
/// Beware that when calling this constructor the organization of batches must be equal in both
/// containers. This Constructor will not reorganize the data!
WeightedLabeledData(LabeledData const& data, Data const& weights)
: base_type(data,weights)
{}
///\brief Construction from data and a constant weight for all elements
WeightedLabeledData(LabeledData const& data, double weight)
: base_type(data,weight)
{}
///\brief Access to the inputs as a separate container.
UnlabeledData const& inputs() const{
return data().inputs();
}
///\brief Access to the inputs as a separate container.
UnlabeledData& inputs(){
return data().inputs();
}
///\brief Access to the labels as a separate container.
Data const& labels() const{
return data().labels();
}
///\brief Access to the labels as a separate container.
Data& labels(){
return data().labels();
}
///\brief Returns the Shape of the inputs.
Shape const& inputShape() const{
return inputs().shape();
}
///\brief Returns the Shape of the inputs.
Shape& inputShape(){
return inputs().shape();
}
///\brief Returns the Shape of the labels.
Shape const& labelShape() const{
return labels().shape();
}
///\brief Returns the Shape of the labels.
Shape& labelShape(){
return labels().shape();
}
/// \brief Constructs an WeightedUnlabeledData object for the inputs.
WeightedUnlabeledData weightedInputs() const{
return WeightedUnlabeledData(data().inputs(),weights());
}
///\brief Splits the container into two independent parts. The left part remains in the container, the right is stored as return type
///
///Order of elements remain unchanged. The SharedVector is not allowed to be shared for
///this to work.
WeightedLabeledData splice(std::size_t batch){
return WeightedLabeledData(data().splice(batch),weights().splice(batch));
}
friend void swap(WeightedLabeledData& a, WeightedLabeledData& b){
swap(static_cast(a),static_cast(b));
}
};
///brief Outstream of elements for weighted labeled data.
template
std::ostream &operator << (std::ostream &stream, const WeightedLabeledData& d) {
for(auto elem: d.elements())
stream << elem.weight <<" ("<< elem.data.label << " [" << elem.data.input<<"] )"<< "\n";
return stream;
}
//Stuff for Dimensionality and querying of basic information
inline std::size_t numberOfClasses(WeightedUnlabeledData const& labels){
return numberOfClasses(labels.data());
}
///\brief Returns the number of members of each class in the dataset.
inline std::vector classSizes(WeightedUnlabeledData const& labels){
return classSizes(labels.data());
}
///\brief Return the dimnsionality of points of a weighted dataset
template
std::size_t dataDimension(WeightedUnlabeledData const& dataset){
return dataDimension(dataset.data());
}
///\brief Return the input dimensionality of a weighted labeled dataset.
template
std::size_t inputDimension(WeightedLabeledData const& dataset){
return dataDimension(dataset.inputs());
}
///\brief Return the label/output dimensionality of a labeled dataset.
template
std::size_t labelDimension(WeightedLabeledData const& dataset){
return dataDimension(dataset.labels());
}
///\brief Return the number of classes (highest label value +1) of a classification dataset with unsigned int label encoding
template
std::size_t numberOfClasses(WeightedLabeledData const& dataset){
return numberOfClasses(dataset.labels());
}
///\brief Returns the number of members of each class in the dataset.
template
inline std::vector classSizes(WeightedLabeledData const& dataset){
return classSizes(dataset.labels());
}
///\brief Returns the total sum of weights.
template
double sumOfWeights(WeightedUnlabeledData const& dataset){
double weightSum = 0;
for(std::size_t i = 0; i != dataset.numberOfBatches(); ++i){
weightSum += sum(dataset.batch(i).weight);
}
return weightSum;
}
///\brief Returns the total sum of weights.
template
double sumOfWeights(WeightedLabeledData const& dataset){
double weightSum = 0;
for(std::size_t i = 0; i != dataset.numberOfBatches(); ++i){
weightSum += sum(dataset.batch(i).weight);
}
return weightSum;
}
/// \brief Computes the cumulative weight of every class.
template
RealVector classWeight(WeightedLabeledData const& dataset){
RealVector weights(numberOfClasses(dataset),0.0);
for(auto const& elem: dataset.elements()){
weights(elem.data.label) += elem.weight;
}
return weights;
}
//creation of weighted datasets
/// \brief creates a weighted unweighted data object from two ranges, representing data and weights
template
typename boost::disable_if<
boost::is_arithmetic,
WeightedLabeledData<
typename boost::range_value::type,
typename boost::range_value::type
>
>::type createLabeledDataFromRange(InputRange const& inputs, LabelRange const& labels, WeightRange const& weights, std::size_t batchSize = 0){
SHARK_RUNTIME_CHECK(batchSize(inputs) == batchSize(labels),
"number of inputs and number of labels must agree");
SHARK_RUNTIME_CHECK(batchSize(inputs) == batchSize(weights),
"number of data points and number of weights must agree");
typedef typename boost::range_value::type InputType;
typedef typename boost::range_value::type LabelType;
if (batchSize == 0)
batchSize = WeightedLabeledData::DefaultBatchSize;
return WeightedLabeledData(
createLabeledDataFromRange(inputs,labels,batchSize),
createDataFromRange(weights,batchSize)
);
}
/// \brief Creates a bootstrap partition of a labeled dataset and returns it using weighting.
///
/// Bootstrapping resamples the dataset by drawing a set of points with
/// replacement. Thus the sampled set will contain some points multiple times
/// and some points not at all. Bootstrapping is usefull to obtain unbiased
/// measurements of the mean and variance of an estimator.
///
/// Optionally the size of the bootstrap (that is, the number of sampled points)
/// can be set. By default it is 0, which indicates that it is the same size as the original dataset.
template
WeightedLabeledData< InputType, LabelType> bootstrap(
LabeledData const& dataset,
std::size_t bootStrapSize = 0
){
if(bootStrapSize == 0)
bootStrapSize = dataset.numberOfElements();
WeightedLabeledData bootstrapSet(dataset,0.0);
for(std::size_t i = 0; i != bootStrapSize; ++i){
std::size_t index = random::discrete(random::globalRng, std::size_t(0),bootStrapSize-1);
bootstrapSet.element(index).weight += 1.0;
}
bootstrapSet.inputShape() = dataset.inputShape();
bootstrapSet.labelShape() = dataset.labelShape();
return bootstrapSet;
}
/// \brief Creates a bootstrap partition of an unlabeled dataset and returns it using weighting.
///
/// Bootstrapping resamples the dataset by drawing a set of points with
/// replacement. Thus the sampled set will contain some points multiple times
/// and some points not at all. Bootstrapping is usefull to obtain unbiased
/// measurements of the mean and variance of an estimator.
///
/// Optionally the size of the bootstrap (that is, the number of sampled points)
/// can be set. By default it is 0, which indicates that it is the same size as the original dataset.
template
WeightedUnlabeledData bootstrap(
UnlabeledData const& dataset,
std::size_t bootStrapSize = 0
){
if(bootStrapSize == 0)
bootStrapSize = dataset.numberOfElements();
WeightedUnlabeledData bootstrapSet(dataset,0.0);
for(std::size_t i = 0; i != bootStrapSize; ++i){
std::size_t index = random::discrete(random::globalRng, std::size_t(0),bootStrapSize-1);
bootstrapSet.element(index).weight += 1.0;
}
bootstrapSet.shape() = dataset.shape();
return bootstrapSet;
}
}
#endif