//=========================================================================== /*! * * * \brief Clusters defined by centroids. * * * * \author T. Glasmachers * \date 2011 * * * \par Copyright 1995-2017 Shark Development Team * *

* This file is part of Shark. * * * Shark is free software: you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as published * by the Free Software Foundation, either version 3 of the License, or * (at your option) any later version. * * Shark is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU Lesser General Public License for more details. * * You should have received a copy of the GNU Lesser General Public License * along with Shark. If not, see . * */ //=========================================================================== #ifndef SHARK_MODELS_CLUSTERING_CENTROIDS_H #define SHARK_MODELS_CLUSTERING_CENTROIDS_H #include #include #include namespace shark { /// \brief Clusters defined by centroids. /// /// \par /// Centroids are an elementary way to define clusters by means /// of the one-nearest-neighbor rule. This rule defines a hard /// clustering decision. /// /// \par /// The Centroids class uses inverse distances to compute soft /// clustering memberships. This is arbitrary and can be changed /// by overriding the membershipKernel function. /// class Centroids : public AbstractClustering { typedef AbstractClustering base_type; public: /// Default constructor SHARK_EXPORT_SYMBOL Centroids(); /// Constructor /// /// \param centroids number of centroids in the model (initially zero) /// \param dimension dimension of the input space, and thus of the centroids SHARK_EXPORT_SYMBOL Centroids(std::size_t centroids, std::size_t dimension); /// Constructor /// /// \param centroids centroid vectors SHARK_EXPORT_SYMBOL Centroids(Data const& centroids); /// \brief From INameable: return the class name. std::string name() const { return "Centroids"; } /// from IParameterizable SHARK_EXPORT_SYMBOL RealVector parameterVector() const; /// from IParameterizable SHARK_EXPORT_SYMBOL void setParameterVector(RealVector const& newParameters); /// from IParameterizable SHARK_EXPORT_SYMBOL std::size_t numberOfParameters() const; /// return the dimension of the inputs Shape inputShape() const{ return dataDimension(m_centroids); } /// return the number of centroids in the model SHARK_EXPORT_SYMBOL std::size_t numberOfClusters() const; /// read access to the centroid vectors Data const& centroids() const{ return m_centroids; } /// overwrite the centroid vectors void setCentroids(Data const& newCentroids){ m_centroids = newCentroids; } /// from ISerializable SHARK_EXPORT_SYMBOL void read(InArchive& archive); /// from ISerializable SHARK_EXPORT_SYMBOL void write(OutArchive& archive) const; /// from AbstractClustering: Compute cluster memberships. SHARK_EXPORT_SYMBOL RealVector softMembership(RealVector const& pattern) const; /// From AbstractClustering: Compute cluster memberships for a batch of patterns. SHARK_EXPORT_SYMBOL RealMatrix softMembership(BatchInputType const& patterns) const; /// Computes the distances of each pattern to all cluster centers SHARK_EXPORT_SYMBOL RealMatrix distances(BatchInputType const& patterns) const; /// initialize centroids from labeled data: take the first /// data points with different labels; if there are more /// centroids than classes, the remaining centroids are filled /// with the first elements in the data set /// /// \param data dataset from which to take the centroids /// \param noClusters number of centroids in the model, default 0 is mapped to the number of classes in the data set /// \param noClasses number of clases in the dataset, default 0 means that the number is computed SHARK_EXPORT_SYMBOL void initFromData(ClassificationDataset const& data, std::size_t noClusters = 0, std::size_t noClasses = 0); /// initialize centroids from unlabeled data: /// take a random subset of data points /// /// \param dataset dataset from which to take the centroids /// \param noClusters number of centroids in the model SHARK_EXPORT_SYMBOL void initFromData(Data const& dataset, std::size_t noClusters); protected: /// Compute unnormalized membership from distance. /// The default implementation is to return exp(-distance) SHARK_EXPORT_SYMBOL virtual double membershipKernel(double dist) const; /// centroid vectors Data m_centroids; }; } #endif