//=========================================================================== /*! * * * \brief Support for importing and exporting data from and to sparse data (libSVM) formatted data files * * * \par * The most important application of the methods provided in this * file is the import of data from LIBSVM files to Shark Data containers. * * * * * \author M. Tuma, T. Glasmachers, C. Igel * \date 2010-2016 * * * \par Copyright 1995-2017 Shark Development Team * *

* This file is part of Shark. * * * Shark is free software: you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as published * by the Free Software Foundation, either version 3 of the License, or * (at your option) any later version. * * Shark is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU Lesser General Public License for more details. * * You should have received a copy of the GNU Lesser General Public License * along with Shark. If not, see . * */ //=========================================================================== #ifndef SHARK_DATA_SPARSEDATA_H #define SHARK_DATA_SPARSEDATA_H #include #include #include #include namespace shark { /** * \ingroup shark_globals * * @{ */ /// \brief Import classification data from a sparse data (libSVM) file. /// /// \param dataset container storing the loaded data /// \param stream stream to be read from /// \param highestIndex highest feature index, or 0 for auto-detection /// \param batchSize size of batch SHARK_EXPORT_SYMBOL void importSparseData( LabeledData& dataset, std::istream& stream, unsigned int highestIndex = 0, std::size_t batchSize = LabeledData::DefaultBatchSize ); SHARK_EXPORT_SYMBOL void importSparseData( LabeledData& dataset, std::istream& stream, unsigned int highestIndex = 0, std::size_t batchSize = LabeledData::DefaultBatchSize ); /// \brief Import regression data from a sparse data (libSVM) file. /// /// \param dataset container storing the loaded data /// \param stream stream to be read from /// \param highestIndex highest feature index, or 0 for auto-detection /// \param batchSize size of batch SHARK_EXPORT_SYMBOL void importSparseData( LabeledData& dataset, std::istream& stream, unsigned int highestIndex = 0, std::size_t batchSize = LabeledData::DefaultBatchSize ); SHARK_EXPORT_SYMBOL void importSparseData( LabeledData& dataset, std::istream& stream, unsigned int highestIndex = 0, std::size_t batchSize = LabeledData::DefaultBatchSize ); /// \brief Import classification data from a sparse data (libSVM) file. /// /// \param dataset container storing the loaded data /// \param stream stream to be read from /// \param highestIndex highest feature index, or 0 for auto-detection /// \param batchSize size of batch SHARK_EXPORT_SYMBOL void importSparseData( LabeledData& dataset, std::istream& stream, unsigned int highestIndex = 0, std::size_t batchSize = LabeledData::DefaultBatchSize ); SHARK_EXPORT_SYMBOL void importSparseData( LabeledData& dataset, std::istream& stream, unsigned int highestIndex = 0, std::size_t batchSize = LabeledData::DefaultBatchSize ); /// \brief Import regression data from a sparse data (libSVM) file. /// /// \param dataset container storing the loaded data /// \param stream stream to be read from /// \param highestIndex highest feature index, or 0 for auto-detection /// \param batchSize size of batch SHARK_EXPORT_SYMBOL void importSparseData( LabeledData& dataset, std::istream& stream, unsigned int highestIndex = 0, std::size_t batchSize = LabeledData::DefaultBatchSize ); SHARK_EXPORT_SYMBOL void importSparseData( LabeledData& dataset, std::istream& stream, unsigned int highestIndex = 0, std::size_t batchSize = LabeledData::DefaultBatchSize ); /// \brief Import classification data from a sparse data (libSVM) file. /// /// \param dataset container storing the loaded data /// \param fn the file to be read from /// \param highestIndex highest feature index, or 0 for auto-detection /// \param batchSize size of batch SHARK_EXPORT_SYMBOL void importSparseData( LabeledData& dataset, std::string fn, unsigned int highestIndex = 0, std::size_t batchSize = LabeledData::DefaultBatchSize ); SHARK_EXPORT_SYMBOL void importSparseData( LabeledData& dataset, std::string fn, unsigned int highestIndex = 0, std::size_t batchSize = LabeledData::DefaultBatchSize ); /// \brief Import regression data from a sparse data (libSVM) file. /// /// \param dataset container storing the loaded data /// \param fn the file to be read from /// \param highestIndex highest feature index, or 0 for auto-detection /// \param batchSize size of batch SHARK_EXPORT_SYMBOL void importSparseData( LabeledData& dataset, std::string fn, unsigned int highestIndex = 0, std::size_t batchSize = LabeledData::DefaultBatchSize ); SHARK_EXPORT_SYMBOL void importSparseData( LabeledData& dataset, std::string fn, unsigned int highestIndex = 0, std::size_t batchSize = LabeledData::DefaultBatchSize ); /// \brief Import classification data from a sparse data (libSVM) file. /// /// \param dataset container storing the loaded data /// \param fn the file to be read from /// \param highestIndex highest feature index, or 0 for auto-detection /// \param batchSize size of batch SHARK_EXPORT_SYMBOL void importSparseData( LabeledData& dataset, std::string fn, unsigned int highestIndex = 0, std::size_t batchSize = LabeledData::DefaultBatchSize ); SHARK_EXPORT_SYMBOL void importSparseData( LabeledData& dataset, std::string fn, unsigned int highestIndex = 0, std::size_t batchSize = LabeledData::DefaultBatchSize ); /// \brief Import regression data from a sparse data (libSVM) file. /// /// \param dataset container storing the loaded data /// \param fn the file to be read from /// \param highestIndex highest feature index, or 0 for auto-detection /// \param batchSize size of batch SHARK_EXPORT_SYMBOL void importSparseData( LabeledData& dataset, std::string fn, unsigned int highestIndex = 0, std::size_t batchSize = LabeledData::DefaultBatchSize ); SHARK_EXPORT_SYMBOL void importSparseData( LabeledData& dataset, std::string fn, unsigned int highestIndex = 0, std::size_t batchSize = LabeledData::DefaultBatchSize ); /// \brief Export classification data to sparse data (libSVM) format. /// /// \param dataset Container storing the data /// \param stream Output stream /// \param oneMinusOne Flag for applying the transformation y<-2y-1 to binary labels /// \param sortLabels Flag for sorting data points according to labels template void exportSparseData(LabeledData const& dataset, std::ostream& stream, bool oneMinusOne = true, bool sortLabels = false) { if (numberOfClasses(dataset) != 2) oneMinusOne = false; std::vector< KeyValuePair > > order; for (std::size_t b=0; b void exportSparseData(LabeledData const& dataset, const std::string &fn, bool oneMinusOne = true, bool sortLabels = false, bool append = false) { std::ofstream ofs; // shall we append only or overwrite? if (append == true) { ofs.open (fn.c_str(), std::fstream::out | std::fstream::app ); } else { ofs.open (fn.c_str()); } SHARK_RUNTIME_CHECK(ofs, "File can not be opened for writing"); exportSparseData(dataset, ofs, oneMinusOne, sortLabels); } /// \brief Export regression data to sparse data (libSVM) format. /// /// \param dataset Container storing the data /// \param stream Output stream template void exportSparseData(LabeledData const& dataset, std::ostream& stream) { for (std::size_t b=0; b void exportSparseData(LabeledData const& dataset, const std::string &fn, bool append = false) { std::ofstream ofs; // shall we append only or overwrite? if (append == true) { ofs.open (fn.c_str(), std::fstream::out | std::fstream::app ); } else { ofs.open (fn.c_str()); } SHARK_RUNTIME_CHECK(ofs, "File can not be opened for writing"); exportSparseData(dataset, ofs); } /** @}*/ } #endif