//=========================================================================== /*! * * * \brief This will relabel a given dataset to have labels 0..N-1 (and vice versa) * * * * \author Aydin Demircioglu * \date 2014 * * * \par Copyright 1995-2017 Shark Development Team * *

* This file is part of Shark. * * * Shark is free software: you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as published * by the Free Software Foundation, either version 3 of the License, or * (at your option) any later version. * * Shark is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU Lesser General Public License for more details. * * You should have received a copy of the GNU Lesser General Public License * along with Shark. If not, see . * */ //=========================================================================== #ifndef SHARK_LABELORDER_H #define SHARK_LABELORDER_H #include #include #include namespace shark { /// \brief This will normalize the labels of a given dataset to 0..N-1 /// /// \par This will normalize the labels of a given dataset to 0..N-1 /// and store the ordering in a member variable. /// After processing, the dataset will afterwards have labels ranging /// from 0 to N-1, with N the number of classes, so usual Shark /// trainers can work with it. /// One can then revert the original labeling just by calling restoreOriginalLabels class LabelOrder : public INameable { private: public: LabelOrder() {}; virtual ~LabelOrder() {}; /// \brief From INameable: return the class name. std::string name() const { return "LabelOrder"; } /// \brief This will normalize the labels and store the ordering in the /// member variables. The dataset will afterwards have labels ranging /// from 0 to N-1, with N the number of classes. /// This will overwrite any previously stored label ordering in the object. /// /// \param[in,out] dataset dataset that will be relabeled void normalizeLabels(LabeledData &dataset) { // determine the min and max labels of the given dataset unsigned int minLabel = std::numeric_limits::max(); unsigned int maxLabel = 0; for(std::size_t i = 0; i < dataset.numberOfElements(); ++i) { unsigned int label = dataset.labels().element(i); if(label < minLabel) minLabel = label; if(label > maxLabel) maxLabel = label; } // now we create an vector that can hold the label ordering m_labelOrder.clear(); // and one array that tracks what we already encountered unsigned int maxval = std::numeric_limits::max(); std::vector foundLabels(maxLabel - minLabel + 1, maxval); // and insert all labels we encounter unsigned int currentPosition = 0; for(std::size_t i = 0; i < dataset.numberOfElements(); i++) { // is it a new label? unsigned int label = dataset.labels().element(i); if(foundLabels[label - minLabel] == maxval) { foundLabels[label - minLabel] = currentPosition; m_labelOrder.push_back(label); currentPosition++; } } // now map every label for(std::size_t i = 0; i < dataset.numberOfElements(); i++) { unsigned int label = dataset.labels().element(i); dataset.labels().element(i) = foundLabels[label - minLabel]; } } /// \brief This will restore the original labels of the dataset. This /// must be called with data compatible the original dataset, so that the labels will /// fit. The label ordering will not be destroyed after calling this function, so /// it can be called multiple times, e.g. to testsets or similar data. /// /// \param[in,out] dataset dataset to relabel (restore labels) void restoreOriginalLabels(LabeledData &dataset) { // now map every label for(std::size_t i = 0; i < dataset.numberOfElements(); ++i) { unsigned int label = dataset.labels().element(i); // check if the reordering fit the data SHARK_RUNTIME_CHECK(label < m_labelOrder.size(),"Dataset labels does not fit to the stored ordering!"); // relabel label = m_labelOrder[label]; dataset.labels().element(i) = label; } } /// \brief Get label ordering directly /// /// \param[out] labelOrder vector to store the current label order. void getLabelOrder(std::vector& labelOrder) { labelOrder = m_labelOrder; } /// \brief Set label ordering directly /// /// \param[in] labelOrder vector with the new label order void setLabelOrder(std::vector const& labelOrder) { m_labelOrder = labelOrder; } protected: std::vector m_labelOrder; }; } #endif