//=========================================================================== /*! * * * \brief Tree for nearest neighbor search in low dimensions. * * * * \author T. Glasmachers * \date 2011 * * * \par Copyright 1995-2017 Shark Development Team * *

* This file is part of Shark. * * * Shark is free software: you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as published * by the Free Software Foundation, either version 3 of the License, or * (at your option) any later version. * * Shark is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU Lesser General Public License for more details. * * You should have received a copy of the GNU Lesser General Public License * along with Shark. If not, see . * */ //=========================================================================== #ifndef SHARK_ALGORITHMS_NEARESTNEIGHBORS_KDTREE_H #define SHARK_ALGORITHMS_NEARESTNEIGHBORS_KDTREE_H #include #include #include #include namespace shark { /// /// \brief KD-tree, a binary space-partitioning tree /// /// \par /// KD-tree data structure for efficient nearest /// neighbor searches in low-dimensional spaces. /// Low-dimensional means << 10 dimensions. /// /// \par /// The tree is constructed from data by splitting /// the dimension with largest extent (in the data /// covered, not space represented by the cell), /// recursively. An approximate median is used as /// the cutting point, where the maximal number of /// points used to estimate the median can be /// controlled with the template parameter /// MedianAccuracy. /// /// \par /// The bucket size for the tree is aleays one, /// such that there is a unique point in each leaf /// cell. /// template class KDTree : public BinaryTree { typedef KDTree self_type; typedef BinaryTree base_type; public: /// Construct the tree from data. /// It is assumed that the container exceeds /// the lifetime of the KDTree (which holds /// only references to the points), and that /// the memory locations of the points remain /// unchanged. KDTree(Data const& dataset, TreeConstruction tc = TreeConstruction()) : base_type(dataset.numberOfElements()) , m_cutDim(0xffffffff){ typedef DataView const> PointSet; PointSet points(dataset); //create a list to the iterator elements as temporary storage std::vector::type> elements(m_size); boost::iota(elements,boost::begin(points)); buildTree(tc,elements); //after the creation of the trees, the iterators in the array are sorted in the order, //they are referenced by the nodes.so we can create the indexList using the indizes of the iterators for(std::size_t i = 0; i != m_size; ++i){ mp_indexList[i] = elements[i].index(); } } /// lower bound on the box-shaped /// space represented by this cell double lower(std::size_t dim) const{ self_type* parent = static_cast(mep_parent); if (parent == NULL) return -1e100; if (parent->m_cutDim == dim && parent->mp_right == this) return parent->threshold(); else return parent->lower(dim); } /// upper bound on the box-shaped /// space represented by this cell double upper(std::size_t dim) const{ self_type* parent = static_cast(mep_parent); if (parent == NULL) return +1e100; if (parent->m_cutDim == dim && parent->mp_left == this) return parent->threshold(); else return parent->upper(dim); } /// \par /// Compute the squared Euclidean distance of /// this cell to the given reference point, or /// alternatively a lower bound on this value. /// /// \par /// In the case of the kd-tree the computation /// is exact, however, only a lower bound is /// required in general, and other space /// partitioning trees used in the future may /// only be able to provide a lower bound, at /// least with reasonable computational efforts. double squaredDistanceLowerBound(InputT const& reference) const { double ret = 0.0; for (std::size_t d = 0; d != reference.size(); d++) { double v = reference(d); double l = lower(d); double u = upper(d); if (v < l){ ret += sqr(l-v); } else if (v > u){ ret += sqr(v-u); } } return ret; } #if 0 // debug code, please ignore void print(unsigned int ident = 0) const { if (this->isLeaf()) { for (unsigned int j=0; jindex(j)); } } else { for (unsigned int i=0; ithreshold()); for (unsigned int i=0; isize()); ((self_type*)mp_left)->print(ident + 1); for (unsigned int i=0; isize()); ((self_type*)mp_right)->print(ident + 1); } } #endif protected: using base_type::mep_parent; using base_type::mp_left; using base_type::mp_right; using base_type::mp_indexList; using base_type::m_size; using base_type::m_nodes; /// (internal) construction of a non-root node KDTree(KDTree* parent, std::size_t* list, std::size_t size) : base_type(parent, list, size) , m_cutDim(0xffffffff) { } /// (internal) construction method: /// median-cuts of the dimension with widest spread template void buildTree(TreeConstruction tc, Range& points){ typedef typename boost::range_iterator::type iterator; iterator begin = boost::begin(points); iterator end = boost::end(points); if (tc.maxDepth() == 0 || m_size <= tc.maxBucketSize()){ m_nodes = 1; return; } m_cutDim = calculateCuttingDimension(points); if (m_cutDim == (*begin)->size()){ m_nodes = 1; return; } // calculate the distance of the boundary for every point in the list std::vector distance(m_size); iterator point = begin; for(std::size_t i = 0; i != m_size; ++i,++point){ distance[i] = (**point)[m_cutDim]; } // split the list into sub-cells iterator split = this->splitList(distance,points); std::size_t leftSize = split-begin; // create sub-nodes mp_left = new KDTree(this, mp_indexList, leftSize); mp_right = new KDTree(this, mp_indexList + leftSize, m_size - leftSize); // recurse boost::iterator_range left(begin,split); boost::iterator_range right(split,end); ((KDTree*)mp_left)->buildTree(tc.nextDepthLevel(), left); ((KDTree*)mp_right)->buildTree(tc.nextDepthLevel(), right); m_nodes = 1 + mp_left->nodes() + mp_right->nodes(); } ///\brief Calculate the dimension which should be cut by this node /// ///The KD-Tree calculates the Axis-Aligned-Bounding-Box surrounding the points ///and splits the longest dimension template std::size_t calculateCuttingDimension(Range const& points)const{ typedef typename boost::range_iterator::type iterator; iterator begin = boost::begin(points); // iterator end = boost::end(points); // calculate bounding box of the data InputT L = **begin; InputT U = **begin; std::size_t dim = L.size(); iterator point = begin; ++point; for (std::size_t i=1; i != m_size; ++i,++point){ for (std::size_t d = 0; d != dim; d++){ double v = (**point)[d]; if (v < L[d]) L[d] = v; if (v > U[d]) U[d] = v; } } // find the longest edge of the bounding box std::size_t cutDim = 0; double extent = U[0] - L[0]; for (std::size_t d = 1; d != dim; d++) { double e = U[d] - L[d]; if (e > extent) { extent = e; cutDim = d; } } if(extent == 0) return dim; return cutDim; } /// Function describing the separating hyperplane /// as its zero set. It is guaranteed that the /// gradient has norm one, thus the absolute value /// of the function indicates distance from the /// hyperplane. double funct(InputT const& reference) const{ return reference[m_cutDim]; } /// split/cut dimension of this node std::size_t m_cutDim; }; } #endif