//=========================================================================== /*! * * * \brief Tree for nearest neighbor search in data with low embedding dimension. * * * * \author T. Glasmachers * \date 2011 * * * \par Copyright 1995-2017 Shark Development Team * *

* This file is part of Shark. * * * Shark is free software: you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as published * by the Free Software Foundation, either version 3 of the License, or * (at your option) any later version. * * Shark is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU Lesser General Public License for more details. * * You should have received a copy of the GNU Lesser General Public License * along with Shark. If not, see . * */ //=========================================================================== #ifndef SHARK_ALGORITHMS_NEARESTNEIGHBORS_LCTREE_H #define SHARK_ALGORITHMS_NEARESTNEIGHBORS_LCTREE_H #include #include #include #include namespace shark { /// /// \brief LC-tree, a binary space-partitioning tree /// /// \par /// LC-tree data structure for efficient nearest /// neighbor searches in possibly high-dimensional /// spaces, but with low-dimensional effective data /// dimension (means << 10 dimensions). /// "LC" stands for Linear Cut. /// /// This tree requires the existence of a function /// inner_prod computing the standard inner product /// of two objects of type VectorType, and a function /// distanceSqr computing the squared Euclidean /// distance between two vectors. /// /// \par /// The tree is constructed from data by splitting /// the direction with largest extent (in the data /// covered, not space represented by the cell), /// recursively. Approximate direction and median /// are used to determine the cutting hyperplane, /// where the maximal number of points used to /// estimate the median can be controlled with the /// template parameter CuttingAccuracy. /// /// \par /// The bucket size for the tree is always one, /// such that there is a unique point in each leaf /// cell. /// template class LCTree : public BinaryTree { typedef BinaryTree base_type; public: /// Construct the tree from data. /// It is assumed that the container exceeds /// the lifetime of the LCTree (which holds /// only references to the points), and that /// the memory locations of the points remain /// unchanged. LCTree(Data const& dataset, TreeConstruction tc = TreeConstruction()) : base_type(dataset.numberOfElements()) , m_normal(dataDimension(dataset)){ typedef DataView const> PointSet; PointSet points(dataset); //create a list to the iterator elements as temporary storage //we need indexed operators to have a fast lookup of the position of the elements in the container typedef IndexedIterator::type> iterator; std::vector elements(m_size); boost::iota(elements,iterator(boost::begin(points),0)); buildTree(tc,elements); //after the creation of the trees, the iterators in the array are sorted in the order, //they are referenced by the nodes.so we can create the indexList using the indizes of the iterators for(std::size_t i = 0; i != m_size; ++i){ mp_indexList[i] = elements[i].index(); } } /// \par /// Compute the squared Euclidean distance of /// this cell to the given reference point, or /// alternatively a lower bound on this value. double squaredDistanceLowerBound(VectorType const& reference) const{ double dist = 0.0; LCTree const* t = this; LCTree const* p = (LCTree const*)mep_parent; while (p != NULL) { double v = p->distanceFromPlane(reference); if (t == p->mp_right) v = -v; if (v > dist) dist = v; t = p; p = (LCTree const*)p->mep_parent; } return dist * dist; } protected: using base_type::mep_parent; using base_type::mp_left; using base_type::mp_right; using base_type::mp_indexList; using base_type::m_size; using base_type::m_nodes; /// (internal) construction of a non-root node LCTree(LCTree* parent, std::size_t* list, std::size_t size) : base_type(parent, list, size){} /// (internal) construction method: /// median-cuts of the dimension with widest spread template void buildTree(TreeConstruction tc, Range& points){ typedef typename Range::value_type pointIterator; typedef typename Range::iterator iterator; //check whether we are finished if (tc.maxDepth() == 0 || m_size <= tc.maxBucketSize()) { m_nodes = 1; return; } // use only a subset of size at most CuttingAccuracy // to estimate the vector along the longest // distance if (m_size <= CuttingAccuracy){ calculateNormal(points); } else{ boost::array samples; for(std::size_t i = 0; i != CuttingAccuracy; i++) samples[i] = points[m_size * (2*i+1) / (2*CuttingAccuracy)]; calculateNormal(samples); } //calculate the distance from the plane for every point in the list std::vector distance(m_size); for(std::size_t i = 0; i != m_size; ++i){ distance[i] = inner_prod(m_normal, *points[i]); } // split the list into sub-cells iterator split = this->splitList(distance,points); iterator begin = boost::begin(points); iterator end = boost::end(points); if (split == end) {//can't split points. m_nodes = 1; return; } // create sub-nodes std::size_t leftSize = split-begin; mp_left = new LCTree(this, mp_indexList, leftSize); mp_right = new LCTree(this, mp_indexList + leftSize, m_size - leftSize); // recurse boost::iterator_range left(begin,split); boost::iterator_range right(split,end); ((LCTree*)mp_left)->buildTree(tc.nextDepthLevel(),left); ((LCTree*)mp_right)->buildTree(tc.nextDepthLevel(),right); m_nodes = 1 + mp_left->nodes() + mp_right->nodes(); } /// function describing the separating hyperplane double funct(VectorType const& reference) const{ return inner_prod(m_normal, reference); } //find the longest distance between vectors in the sample set and calculate //the normal along this direction template void calculateNormal(Range const& samples){ std::size_t numSamples = samples.size(); std::size_t besti = 0; std::size_t bestj = 0; double best_dist2 = -1.0; for (std::size_t i = 1; i != numSamples; i++){ for (std::size_t j = 0; j != i; j++){ double dist2 = distanceSqr(*samples[i], *samples[j]); if (dist2 > best_dist2){ best_dist2 = dist2; besti = i; bestj = j; } } } double factor = 1.0 / std::sqrt(best_dist2); if (! (boost::math::isfinite)(factor)) factor = 1.0; m_normal = factor * (*samples[besti] - *samples[bestj]); } /// split/cut normal vector of this node VectorType m_normal; }; } #endif