//=========================================================================== /*! * * * \brief Super class for clustering definitions. * * * * \author T. Glasmachers * \date 2011 * * * \par Copyright 1995-2017 Shark Development Team * *

* This file is part of Shark. * * * Shark is free software: you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as published * by the Free Software Foundation, either version 3 of the License, or * (at your option) any later version. * * Shark is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU Lesser General Public License for more details. * * You should have received a copy of the GNU Lesser General Public License * along with Shark. If not, see . * */ //=========================================================================== #ifndef SHARK_MODELS_CLUSTERING_ABSTRACTCLUSTERING_H #define SHARK_MODELS_CLUSTERING_ABSTRACTCLUSTERING_H #include #include #include #include #include #include namespace shark { /// /// \brief Base class for clustering. /// /// \par /// Clustering algorithms vary widely in the data structures /// on which they operate. For example, simple centroid-based /// approaches such as k-means are mutually incompatible with /// tree-based hierarchical clustering. This interface class /// is the attempt to cast different cluster descriptions into /// a minimal common interface that is useful for prediction. /// /// \par /// There are at least two common types of predictions made /// with clusterings. The first one is the assignment of the /// best matching cluster to a patters, such as in vector /// quantization or unsupervised clustering. This is often /// referred to as "hard clustering". The second one is the /// computation of a membership function ("soft clustering"). /// The membership of a pattern to a cluster is non-negative, /// and the total membership should add to one. /// /// \par /// This interface makes minimal assumptions to allow for /// these types of predictions. It assumes that clusters can /// be enumbered (identified by an index), and that at least /// a membership test can be made (hard clustering). It is /// optional to provide a membership function. Only one of /// the two interfaces for best matching cluster or membership /// function need to be implemented, since the best matching /// cluster can be deduced from the membership function. /// However, often the best matching cluster can be computed /// more efficiently than the membership function. In these /// cases both interface functions should be implemented. /// /// \par /// The purpose of this interface is to act as a common /// super class to different data structures describing the /// outcome of a clustering operation. The interface allows /// all of these data structures to be used in the two /// clustering model classes: HardClusteringModel and /// SoftClusteringModel. /// template class AbstractClustering : public INameable, public IParameterizable<>, public ISerializable { public: typedef InputT InputType; typedef unsigned int OutputType; typedef typename Batch::type BatchInputType; typedef Batch::type BatchOutputType; enum Feature { HAS_SOFT_MEMBERSHIP = 1, }; SHARK_FEATURE_INTERFACE; /// Tests whether the clustering can compute a (soft) /// member ship function, describing the membership /// of a sample to the different clusters. bool hasSoftMembershipFunction()const{ return m_features & HAS_SOFT_MEMBERSHIP; } /// return the number of clusters virtual std::size_t numberOfClusters() const = 0; virtual Shape inputShape() const = 0; /// \brief Compute best matching cluster. /// /// \par /// This function should be overriden by sub-classes to /// compute the cluster best matching the input pattern. /// The (typically slow) default implementation is to /// create a batch of size 1 and return the result of the batch call to hardMembership virtual unsigned int hardMembership(InputType const& pattern) const{ typename Batch::type b = Batch::createBatch(pattern); getBatchElement(b,0) = pattern; return hardMembership(b)(0); } /// \brief Compute best matching cluster for a batch of inputs. /// /// \par /// This function should be overriden by sub-classes to /// compute the cluster best matching the input pattern. /// The (typically slow) default implementation is to /// return the arg max of the soft membership function for every pattern. virtual BatchOutputType hardMembership(BatchInputType const& patterns) const{ std::size_t numPatterns = batchSize(patterns); RealMatrix f = softMembership(patterns); SHARK_ASSERT(f.size2() > 0); SHARK_ASSERT(f.size1() == numPatterns); BatchOutputType outputs(numPatterns); for(std::size_t i = 0; i != numPatterns;++i){ auto membership = row(f,i); outputs(i) = (unsigned int)(std::max_element(membership.begin(),membership.end())-membership.begin()); } return outputs; } /// \brief Compute cluster membership function. /// /// \par /// This function should be overriden by sub-classes to /// compute the membership of a pattern to the clusters. /// The default implementation creates a batch of size 1 /// and calls the batched version. If this is not overriden, an xception is thrown. virtual RealVector softMembership(InputType const& pattern) const{ auto output = softMembership(Batch::createBatch(pattern)); return row(output,0); } /// \brief Compute cluster membership function. /// /// \par /// This function should be overriden by sub-classes to /// compute the membership of a pattern to the clusters. /// This default implementation throws an exception. virtual RealMatrix softMembership(BatchInputType const& patterns) const{ SHARK_FEATURE_EXCEPTION(HAS_SOFT_MEMBERSHIP); } /// empty default implementation of ISerializable::read void read(InArchive& archive) {} /// empty default implementation of ISerializable::write void write(OutArchive& archive) const {} }; } #endif