//===========================================================================
/*!
*
*
* \brief Support for importing and exporting data from and to sparse data (libSVM) formatted data files
*
*
* \par
* The most important application of the methods provided in this
* file is the import of data from LIBSVM files to Shark Data containers.
*
*
*
*
* \author M. Tuma, T. Glasmachers, C. Igel
* \date 2010-2016
*
*
* \par Copyright 1995-2017 Shark Development Team
*
*
* This file is part of Shark.
*
*
* Shark is free software: you can redistribute it and/or modify
* it under the terms of the GNU Lesser General Public License as published
* by the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* Shark is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public License
* along with Shark. If not, see .
*
*/
//===========================================================================
#ifndef SHARK_DATA_SPARSEDATA_H
#define SHARK_DATA_SPARSEDATA_H
#include
#include
#include
#include
namespace shark {
/**
* \ingroup shark_globals
*
* @{
*/
/// \brief Import classification data from a sparse data (libSVM) file.
///
/// \param dataset container storing the loaded data
/// \param stream stream to be read from
/// \param highestIndex highest feature index, or 0 for auto-detection
/// \param batchSize size of batch
SHARK_EXPORT_SYMBOL void importSparseData(
LabeledData& dataset,
std::istream& stream,
unsigned int highestIndex = 0,
std::size_t batchSize = LabeledData::DefaultBatchSize
);
SHARK_EXPORT_SYMBOL void importSparseData(
LabeledData& dataset,
std::istream& stream,
unsigned int highestIndex = 0,
std::size_t batchSize = LabeledData::DefaultBatchSize
);
/// \brief Import regression data from a sparse data (libSVM) file.
///
/// \param dataset container storing the loaded data
/// \param stream stream to be read from
/// \param highestIndex highest feature index, or 0 for auto-detection
/// \param batchSize size of batch
SHARK_EXPORT_SYMBOL void importSparseData(
LabeledData& dataset,
std::istream& stream,
unsigned int highestIndex = 0,
std::size_t batchSize = LabeledData::DefaultBatchSize
);
SHARK_EXPORT_SYMBOL void importSparseData(
LabeledData& dataset,
std::istream& stream,
unsigned int highestIndex = 0,
std::size_t batchSize = LabeledData::DefaultBatchSize
);
/// \brief Import classification data from a sparse data (libSVM) file.
///
/// \param dataset container storing the loaded data
/// \param stream stream to be read from
/// \param highestIndex highest feature index, or 0 for auto-detection
/// \param batchSize size of batch
SHARK_EXPORT_SYMBOL void importSparseData(
LabeledData& dataset,
std::istream& stream,
unsigned int highestIndex = 0,
std::size_t batchSize = LabeledData::DefaultBatchSize
);
SHARK_EXPORT_SYMBOL void importSparseData(
LabeledData& dataset,
std::istream& stream,
unsigned int highestIndex = 0,
std::size_t batchSize = LabeledData::DefaultBatchSize
);
/// \brief Import regression data from a sparse data (libSVM) file.
///
/// \param dataset container storing the loaded data
/// \param stream stream to be read from
/// \param highestIndex highest feature index, or 0 for auto-detection
/// \param batchSize size of batch
SHARK_EXPORT_SYMBOL void importSparseData(
LabeledData& dataset,
std::istream& stream,
unsigned int highestIndex = 0,
std::size_t batchSize = LabeledData::DefaultBatchSize
);
SHARK_EXPORT_SYMBOL void importSparseData(
LabeledData& dataset,
std::istream& stream,
unsigned int highestIndex = 0,
std::size_t batchSize = LabeledData::DefaultBatchSize
);
/// \brief Import classification data from a sparse data (libSVM) file.
///
/// \param dataset container storing the loaded data
/// \param fn the file to be read from
/// \param highestIndex highest feature index, or 0 for auto-detection
/// \param batchSize size of batch
SHARK_EXPORT_SYMBOL void importSparseData(
LabeledData& dataset,
std::string fn,
unsigned int highestIndex = 0,
std::size_t batchSize = LabeledData::DefaultBatchSize
);
SHARK_EXPORT_SYMBOL void importSparseData(
LabeledData& dataset,
std::string fn,
unsigned int highestIndex = 0,
std::size_t batchSize = LabeledData::DefaultBatchSize
);
/// \brief Import regression data from a sparse data (libSVM) file.
///
/// \param dataset container storing the loaded data
/// \param fn the file to be read from
/// \param highestIndex highest feature index, or 0 for auto-detection
/// \param batchSize size of batch
SHARK_EXPORT_SYMBOL void importSparseData(
LabeledData& dataset,
std::string fn,
unsigned int highestIndex = 0,
std::size_t batchSize = LabeledData::DefaultBatchSize
);
SHARK_EXPORT_SYMBOL void importSparseData(
LabeledData& dataset,
std::string fn,
unsigned int highestIndex = 0,
std::size_t batchSize = LabeledData::DefaultBatchSize
);
/// \brief Import classification data from a sparse data (libSVM) file.
///
/// \param dataset container storing the loaded data
/// \param fn the file to be read from
/// \param highestIndex highest feature index, or 0 for auto-detection
/// \param batchSize size of batch
SHARK_EXPORT_SYMBOL void importSparseData(
LabeledData& dataset,
std::string fn,
unsigned int highestIndex = 0,
std::size_t batchSize = LabeledData::DefaultBatchSize
);
SHARK_EXPORT_SYMBOL void importSparseData(
LabeledData& dataset,
std::string fn,
unsigned int highestIndex = 0,
std::size_t batchSize = LabeledData::DefaultBatchSize
);
/// \brief Import regression data from a sparse data (libSVM) file.
///
/// \param dataset container storing the loaded data
/// \param fn the file to be read from
/// \param highestIndex highest feature index, or 0 for auto-detection
/// \param batchSize size of batch
SHARK_EXPORT_SYMBOL void importSparseData(
LabeledData& dataset,
std::string fn,
unsigned int highestIndex = 0,
std::size_t batchSize = LabeledData::DefaultBatchSize
);
SHARK_EXPORT_SYMBOL void importSparseData(
LabeledData& dataset,
std::string fn,
unsigned int highestIndex = 0,
std::size_t batchSize = LabeledData::DefaultBatchSize
);
/// \brief Export classification data to sparse data (libSVM) format.
///
/// \param dataset Container storing the data
/// \param stream Output stream
/// \param oneMinusOne Flag for applying the transformation y<-2y-1 to binary labels
/// \param sortLabels Flag for sorting data points according to labels
template
void exportSparseData(LabeledData const& dataset, std::ostream& stream, bool oneMinusOne = true, bool sortLabels = false)
{
if (numberOfClasses(dataset) != 2) oneMinusOne = false;
std::vector< KeyValuePair > > order;
for (std::size_t b=0; b
void exportSparseData(LabeledData const& dataset, const std::string &fn, bool oneMinusOne = true, bool sortLabels = false, bool append = false)
{
std::ofstream ofs;
// shall we append only or overwrite?
if (append == true) {
ofs.open (fn.c_str(), std::fstream::out | std::fstream::app );
} else {
ofs.open (fn.c_str());
}
SHARK_RUNTIME_CHECK(ofs, "File can not be opened for writing");
exportSparseData(dataset, ofs, oneMinusOne, sortLabels);
}
/// \brief Export regression data to sparse data (libSVM) format.
///
/// \param dataset Container storing the data
/// \param stream Output stream
template
void exportSparseData(LabeledData const& dataset, std::ostream& stream)
{
for (std::size_t b=0; b
void exportSparseData(LabeledData const& dataset, const std::string &fn, bool append = false)
{
std::ofstream ofs;
// shall we append only or overwrite?
if (append == true) {
ofs.open (fn.c_str(), std::fstream::out | std::fstream::app );
} else {
ofs.open (fn.c_str());
}
SHARK_RUNTIME_CHECK(ofs, "File can not be opened for writing");
exportSparseData(dataset, ofs);
}
/** @}*/
}
#endif