mlpack  2.0.1
rectangle_tree.hpp
Go to the documentation of this file.
1 
15 #ifndef __MLPACK_CORE_TREE_RECTANGLE_TREE_RECTANGLE_TREE_HPP
16 #define __MLPACK_CORE_TREE_RECTANGLE_TREE_RECTANGLE_TREE_HPP
17 
18 #include <mlpack/core.hpp>
19 
20 #include "../hrectbound.hpp"
21 #include "../statistic.hpp"
22 #include "r_tree_split.hpp"
24 
25 namespace mlpack {
26 namespace tree {
27 
45 template<typename MetricType = metric::EuclideanDistance,
46  typename StatisticType = EmptyStatistic,
47  typename MatType = arma::mat,
48  typename SplitType = RTreeSplit,
49  typename DescentType = RTreeDescentHeuristic>
51 {
52  // The metric *must* be the euclidean distance.
53  static_assert(boost::is_same<MetricType, metric::EuclideanDistance>::value,
54  "RectangleTree: MetricType must be metric::EuclideanDistance.");
55 
56  public:
61  typedef struct SplitHistoryStruct
62  {
64  std::vector<bool> history;
65 
66  SplitHistoryStruct(int dim) : lastDimension(0), history(dim)
67  {
68  for (int i = 0; i < dim; i++)
69  history[i] = false;
70  }
71 
72  template<typename Archive>
73  void Serialize(Archive& ar, const unsigned int /* version */)
74  {
75  ar & data::CreateNVP(lastDimension, "lastDimension");
76  ar & data::CreateNVP(history, "history");
77  }
79 
80  private:
86  size_t numChildren;
88  std::vector<RectangleTree*> children;
95  size_t begin;
98  size_t count;
100  size_t maxLeafSize;
102  size_t minLeafSize;
106  StatisticType stat;
112  const MatType* dataset;
117  std::vector<size_t> points;
119  MatType* localDataset;
120 
121  public:
123  typedef MatType Mat;
124 
127  template<typename RuleType>
130  template<typename RuleType>
131  class DualTreeTraverser;
132 
147  RectangleTree(const MatType& data,
148  const size_t maxLeafSize = 20,
149  const size_t minLeafSize = 8,
150  const size_t maxNumChildren = 5,
151  const size_t minNumChildren = 2,
152  const size_t firstDataIndex = 0);
153 
168  RectangleTree(MatType&& data,
169  const size_t maxLeafSize = 20,
170  const size_t minLeafSize = 8,
171  const size_t maxNumChildren = 5,
172  const size_t minNumChildren = 2,
173  const size_t firstDataIndex = 0);
174 
182  explicit RectangleTree(RectangleTree* parentNode);
183 
191  RectangleTree(const RectangleTree& other, const bool deepCopy = true);
192 
196  template<typename Archive>
198  Archive& ar,
199  const typename boost::enable_if<typename Archive::is_loading>::type* = 0);
200 
206  ~RectangleTree();
207 
213  void SoftDelete();
214 
218  void NullifyData();
219 
227  void InsertPoint(const size_t point);
228 
239  void InsertPoint(const size_t point, std::vector<bool>& relevels);
240 
252  void InsertNode(RectangleTree* node,
253  const size_t level,
254  std::vector<bool>& relevels);
255 
264  bool DeletePoint(const size_t point);
265 
275  bool DeletePoint(const size_t point, std::vector<bool>& relevels);
276 
281  bool RemoveNode(const RectangleTree* node, std::vector<bool>& relevels);
282 
294  const RectangleTree* FindByBeginCount(size_t begin, size_t count) const;
295 
307  RectangleTree* FindByBeginCount(size_t begin, size_t count);
308 
310  const bound::HRectBound<MetricType>& Bound() const { return bound; }
313 
315  const StatisticType& Stat() const { return stat; }
317  StatisticType& Stat() { return stat; }
318 
320  const SplitHistoryStruct& SplitHistory() const { return splitHistory; }
323 
325  bool IsLeaf() const;
326 
328  size_t MaxLeafSize() const { return maxLeafSize; }
330  size_t& MaxLeafSize() { return maxLeafSize; }
331 
333  size_t MinLeafSize() const { return minLeafSize; }
335  size_t& MinLeafSize() { return minLeafSize; }
336 
338  size_t MaxNumChildren() const { return maxNumChildren; }
340  size_t& MaxNumChildren() { return maxNumChildren; }
341 
343  size_t MinNumChildren() const { return minNumChildren; }
345  size_t& MinNumChildren() { return minNumChildren; }
346 
348  RectangleTree* Parent() const { return parent; }
350  RectangleTree*& Parent() { return parent; }
351 
353  const MatType& Dataset() const { return *dataset; }
355  MatType& Dataset() { return const_cast<MatType&>(*dataset); }
356 
358  const std::vector<size_t>& Points() const { return points; }
360  std::vector<size_t>& Points() { return points; }
361 
363  const MatType& LocalDataset() const { return *localDataset; }
365  MatType& LocalDataset() { return *localDataset; }
366 
368  MetricType Metric() const { return MetricType(); }
369 
371  void Center(arma::vec& center) { bound.Center(center); }
372 
374  size_t NumChildren() const { return numChildren; }
376  size_t& NumChildren() { return numChildren; }
377 
379  const std::vector<RectangleTree*>& Children() const { return children; }
381  std::vector<RectangleTree*>& Children() { return children; }
382 
387  double FurthestPointDistance() const;
388 
396  double FurthestDescendantDistance() const;
397 
401  double MinimumBoundDistance() const { return bound.MinWidth() / 2.0; }
402 
405  double ParentDistance() const { return parentDistance; }
408  double& ParentDistance() { return parentDistance; }
409 
415  inline RectangleTree& Child(const size_t child) const
416  {
417  return *children[child];
418  }
419 
425  inline RectangleTree& Child(const size_t child)
426  {
427  return *children[child];
428  }
429 
432  size_t NumPoints() const;
433 
439  size_t NumDescendants() const;
440 
448  size_t Descendant(const size_t index) const;
449 
458  size_t Point(const size_t index) const;
459 
461  double MinDistance(const RectangleTree* other) const
462  {
463  return bound.MinDistance(other->Bound());
464  }
465 
467  double MaxDistance(const RectangleTree* other) const
468  {
469  return bound.MaxDistance(other->Bound());
470  }
471 
474  {
475  return bound.RangeDistance(other->Bound());
476  }
477 
479  template<typename VecType>
480  double MinDistance(const VecType& point,
481  typename boost::enable_if<IsVector<VecType> >::type* = 0)
482  const
483  {
484  return bound.MinDistance(point);
485  }
486 
488  template<typename VecType>
489  double MaxDistance(const VecType& point,
490  typename boost::enable_if<IsVector<VecType> >::type* = 0)
491  const
492  {
493  return bound.MaxDistance(point);
494  }
495 
497  template<typename VecType>
499  const VecType& point,
500  typename boost::enable_if<IsVector<VecType> >::type* = 0) const
501  {
502  return bound.RangeDistance(point);
503  }
504 
508  size_t TreeSize() const;
509 
514  size_t TreeDepth() const;
515 
517  size_t Begin() const { return begin; }
519  size_t& Begin() { return begin; }
520 
522  size_t Count() const { return count; }
524  size_t& Count() { return count; }
525 
527  static bool HasSelfChildren() { return false; }
528 
529  private:
534  RectangleTree(const size_t begin,
535  const size_t count,
537  StatisticType stat,
538  const int maxLeafSize = 20) :
539  begin(begin),
540  count(count),
541  bound(bound),
542  stat(stat),
543  maxLeafSize(maxLeafSize) { }
544 
546  {
547  return new RectangleTree(begin, count, bound, stat, maxLeafSize);
548  }
549 
555  void SplitNode(std::vector<bool>& relevels);
556 
557  protected:
564  RectangleTree();
565 
567  friend class boost::serialization::access;
568 
569  public:
581  void CondenseTree(const arma::vec& point,
582  std::vector<bool>& relevels,
583  const bool usePoint);
584 
592  bool ShrinkBoundForPoint(const arma::vec& point);
593 
601  bool ShrinkBoundForBound(const bound::HRectBound<MetricType>& changedBound);
602 
607 
611  template<typename Archive>
612  void Serialize(Archive& ar, const unsigned int /* version */);
613 };
614 
615 } // namespace tree
616 } // namespace mlpack
617 
618 // Include implementation.
619 #include "rectangle_tree_impl.hpp"
620 
621 #endif
SplitHistoryStruct splitHistory
A struct to store the "split history" for X trees.
double parentDistance
The distance from the centroid of this node to the centroid of the parent.
double MinDistance(const VecType &point, typename boost::enable_if< IsVector< VecType > >::type *=0) const
Return the minimum distance to another point.
RectangleTree * ExactClone()
Make an exact copy of this node, pointers and everything.
size_t & Begin()
Modify the index of the beginning point of this subset.
const MatType & LocalDataset() const
Get the local dataset of this node.
~RectangleTree()
Deletes this node, deallocating the memory for the children and calling their destructors in turn...
double MinWidth() const
Get the minimum width of the bound.
Definition: hrectbound.hpp:101
MetricType Metric() const
Get the metric which the tree uses.
RectangleTree & Child(const size_t child) const
Get the specified child.
Linear algebra utility functions, generally performed on matrices or vectors.
FirstShim< T > CreateNVP(T &t, const std::string &name, typename boost::enable_if< HasSerialize< T >>::type *=0)
Call this function to produce a name-value pair; this is similar to BOOST_SERIALIZATION_NVP(), but should be used for types that have a Serialize() function (or contain a type that has a Serialize() function) instead of a serialize() function.
size_t & MinLeafSize()
Modify the minimum leaf size.
size_t & MaxLeafSize()
Modify the maximum leaf size.
bool IsLeaf() const
Return whether or not this node is a leaf (true if it has no children).
const bound::HRectBound< MetricType > & Bound() const
Return the bound object for this node.
LMetric< 2, true > EuclideanDistance
The Euclidean (L2) distance.
Definition: lmetric.hpp:113
math::Range RangeDistance(const RectangleTree *other) const
Return the minimum and maximum distance to another node.
const MatType * dataset
The dataset.
RectangleTree * parent
The parent node (NULL if this is the root of the tree).
RectangleTree(const size_t begin, const size_t count, bound::HRectBound< MetricType > bound, StatisticType stat, const int maxLeafSize=20)
Private copy constructor, available only to fill (pad) the tree to a specified level.
void CondenseTree(const arma::vec &point, std::vector< bool > &relevels, const bool usePoint)
Condense the bounding rectangles for this node based on the removal of the point specified by the arm...
void SplitNode(std::vector< bool > &relevels)
Splits the current node, recursing up the tree.
bool DeletePoint(const size_t point)
Deletes a point in the tree.
SplitHistoryStruct & SplitHistory()
Modify the split history object of this node.
size_t MaxNumChildren() const
Return the maximum number of children (in a non-leaf node).
RectangleTree()
A default constructor.
size_t maxNumChildren
The max number of child nodes a non-leaf node can have.
std::vector< size_t > points
The mapping to the dataset.
math::Range RangeDistance(const HRectBound &other) const
Calculates minimum and maximum bound-to-bound distance.
size_t Count() const
Return the number of points in this subset.
size_t Begin() const
Return the index of the beginning point of this subset.
const StatisticType & Stat() const
Return the statistic object for this node.
size_t NumDescendants() const
Return the number of descendants of this node.
size_t MaxLeafSize() const
Return the maximum leaf size.
void InsertPoint(const size_t point)
Inserts a point into the tree.
void InsertNode(RectangleTree *node, const size_t level, std::vector< bool > &relevels)
Inserts a node into the tree, tracking which levels have been inserted into.
The X tree requires that the tree records it&#39;s "split history".
double MinimumBoundDistance() const
Return the minimum distance from the center to any edge of the bound.
size_t TreeSize() const
Obtains the number of nodes in the tree, starting with this.
MatType * localDataset
The local dataset.
double & ParentDistance()
Modify the distance from the center of this node to the center of the parent node.
RectangleTree *& Parent()
Modify the parent of this node.
bool ownsDataset
Whether or not we are responsible for deleting the dataset.
size_t minNumChildren
The minimum number of child nodes a non-leaf node can have.
size_t NumPoints() const
Return the number of points in this node (returns 0 if this node is not a leaf).
size_t & NumChildren()
Modify the number of child nodes. Be careful.
size_t count
The number of points in the dataset contained in this node (and its children).
std::vector< size_t > & Points()
Modify the points vector for this node. Be careful!
bool ShrinkBoundForPoint(const arma::vec &point)
Shrink the bound object of this node for the removal of a point.
double ParentDistance() const
Return the distance from the center of this node to the center of the parent node.
size_t & MinNumChildren()
Modify the minimum number of children (in a non-leaf node).
Simple real-valued range.
Definition: range.hpp:21
bool RemoveNode(const RectangleTree *node, std::vector< bool > &relevels)
Removes a node from the tree.
size_t minLeafSize
The minimum leaf size.
const RectangleTree * FindByBeginCount(size_t begin, size_t count) const
Find a node in this tree by its begin and count (const).
A rectangle type tree tree, such as an R-tree or X-tree.
MatType Mat
So other classes can use TreeType::Mat.
double MaxDistance(const VecType &point, typename boost::enable_if< IsVector< VecType > > *=0) const
Calculates maximum bound-to-point squared distance.
math::Range RangeDistance(const VecType &point, typename boost::enable_if< IsVector< VecType > >::type *=0) const
Return the minimum and maximum distance to another point.
double MinDistance(const VecType &point, typename boost::enable_if< IsVector< VecType > > *=0) const
Calculates minimum bound-to-point distance.
Include all of the base components required to write MLPACK methods, and the main MLPACK Doxygen docu...
size_t & MaxNumChildren()
Modify the maximum number of children (in a non-leaf node).
RectangleTree & Child(const size_t child)
Modify the specified child.
StatisticType & Stat()
Modify the statistic object for this node.
A dual tree traverser for rectangle type trees.
size_t & Count()
Modify the number of points in this subset.
void Center(arma::vec &center)
Get the centroid of the node and store it in the given vector.
const std::vector< size_t > & Points() const
Get the points vector for this node.
double MaxDistance(const VecType &point, typename boost::enable_if< IsVector< VecType > >::type *=0) const
Return the maximum distance to another point.
size_t begin
The index of the first point in the dataset contained in this node (and its children).
void SoftDelete()
Delete this node of the tree, but leave the stuff contained in it intact.
double FurthestDescendantDistance() const
Return the furthest possible descendant distance.
double MinDistance(const RectangleTree *other) const
Return the minimum distance to another node.
void Serialize(Archive &ar, const unsigned int)
size_t maxLeafSize
The max leaf size.
MatType & Dataset()
Modify the dataset which the tree is built on. Be careful!
std::vector< RectangleTree * > children
The child nodes (Starting at 0 and ending at (numChildren-1) ).
size_t MinNumChildren() const
Return the minimum number of children (in a non-leaf node).
bound::HRectBound< metric::EuclideanDistance > bound
The bound object for this node.
void Center(arma::vec &center) const
Calculates the center of the range, placing it into the given vector.
A single traverser for rectangle type trees.
void NullifyData()
Set dataset to null.
MatType & LocalDataset()
Modify the local dataset of this node.
size_t Point(const size_t index) const
Return the index (with reference to the dataset) of a particular point in this node.
RectangleTree * Parent() const
Gets the parent of this node.
bool ShrinkBoundForBound(const bound::HRectBound< MetricType > &changedBound)
Shrink the bound object of this node for the removal of a child node.
size_t NumChildren() const
Return the number of child nodes. (One level beneath this one only.)
size_t MinLeafSize() const
Return the minimum leaf size.
double MaxDistance(const RectangleTree *other) const
Return the maximum distance to another node.
size_t TreeDepth() const
Obtains the number of levels below this node in the tree, starting with this.
static bool HasSelfChildren()
Returns false: this tree type does not have self children.
std::vector< RectangleTree * > & Children()
Modify the children of this node.
size_t Descendant(const size_t index) const
Return the index (with reference to the dataset) of a particular descendant of this node...
const MatType & Dataset() const
Get the dataset which the tree is built on.
bound::HRectBound< MetricType > & Bound()
Modify the bound object for this node.
const std::vector< RectangleTree * > & Children() const
Get the children of this node.
double FurthestPointDistance() const
Return the furthest distance to a point held in this node.
If value == true, then VecType is some sort of Armadillo vector or subview.
Definition: arma_traits.hpp:37
StatisticType stat
Any extra data contained in the node.
size_t numChildren
The number of child nodes actually in use (0 if this is a leaf node).
const SplitHistoryStruct & SplitHistory() const
Return the split history object of this node.