How to declare/use an `unordered_set` for triplets (`tuple`) using custom comparator?

172 Views Asked by At

How to declare/use an unordered_set for triplets (tuple) using custom comparator?

I need to store triplets of float (handled as tuple) in a set to check for potential duplicates. As it's about float, I guess using regular compare with == will not work so customizing compare is needed.

This minimal code doesn't compile:

>> cat unordered_set_triplet.cpp 
#include <unordered_set>
#include <tuple>
#include <limits> // numeric_limits
#include <cmath> // fabs
#include <functional> // hash

using triplet = std::tuple<float, float, float>;
bool triplet_equal(triplet const & lhs, triplet const & rhs) {
  float const eps = std::numeric_limits<float>::epsilon();
  if (std::fabs(std::get<0>(lhs) - std::get<0>(rhs)) > eps) return false;
  if (std::fabs(std::get<1>(lhs) - std::get<1>(rhs)) > eps) return false;
  if (std::fabs(std::get<2>(lhs) - std::get<2>(rhs)) > eps) return false;
  return true;
}
using unordered_set_triplet = std::unordered_set<triplet,
                                                 std::hash<triplet>,
                                                 decltype(triplet_equal)>;

int main() {
  //unordered_set_triplet s; // Compilation: KO...
  unordered_set_triplet s(10, std::hash<triplet>, triplet_equal);
  s.insert({1.f, 2.f, 3.f});
}

I get:

>> g++ -std=c++20 -o unordered_set_triplet unordered_set_triplet.cpp 
In file included from /usr/include/c++/12/bits/hashtable.h:35,
                 from /usr/include/c++/12/unordered_set:46,
                 from unordered_set_triplet.cpp:1:
/usr/include/c++/12/bits/hashtable_policy.h: In instantiation of ‘struct std::__detail::_Hashtable_ebo_helper<0, bool(const std::tuple<float, float, float>&, const std::tuple<float, float, float>&), false>’:
/usr/include/c++/12/bits/hashtable_policy.h:1631:12:   required from ‘struct std::__detail::_Hashtable_base<std::tuple<float, float, float>, std::tuple<float, float, float>, std::__detail::_Identity, bool(const std::tuple<float, float, float>&, const std::tuple<float, float, float>&), std::hash<std::tuple<float, float, float> >, std::__detail::_Mod_range_hashing, std::__detail::_Default_ranged_hash, std::__detail::_Hashtable_traits<true, true, true> >’
/usr/include/c++/12/bits/hashtable.h:182:11:   required from ‘class std::_Hashtable<std::tuple<float, float, float>, std::tuple<float, float, float>, std::allocator<std::tuple<float, float, float> >, std::__detail::_Identity, bool(const std::tuple<float, float, float>&, const std::tuple<float, float, float>&), std::hash<std::tuple<float, float, float> >, std::__detail::_Mod_range_hashing, std::__detail::_Default_ranged_hash, std::__detail::_Prime_rehash_policy, std::__detail::_Hashtable_traits<true, true, true> >’
/usr/include/c++/12/bits/unordered_set.h:100:18:   required from ‘class std::unordered_set<std::tuple<float, float, float>, std::hash<std::tuple<float, float, float> >, bool(const std::tuple<float, float, float>&, const std::tuple<float, float, float>&)>’
unordered_set_triplet.cpp:21:26:   required from here
/usr/include/c++/12/bits/hashtable_policy.h:1204:11: error: data member ‘std::__detail::_Hashtable_ebo_helper<0, bool(const std::tuple<float, float, float>&, const std::tuple<float, float, float>&), false>::_M_tp’ invalidly declared function type
 1204 |       _Tp _M_tp{};
      |           ^~~~~
/usr/include/c++/12/bits/hashtable.h: In instantiation of ‘class std::_Hashtable<std::tuple<float, float, float>, std::tuple<float, float, float>, std::allocator<std::tuple<float, float, float> >, std::__detail::_Identity, bool(const std::tuple<float, float, float>&, const std::tuple<float, float, float>&), std::hash<std::tuple<float, float, float> >, std::__detail::_Mod_range_hashing, std::__detail::_Default_ranged_hash, std::__detail::_Prime_rehash_policy, std::__detail::_Hashtable_traits<true, true, true> >’:
/usr/include/c++/12/bits/unordered_set.h:100:18:   required from ‘class std::unordered_set<std::tuple<float, float, float>, std::hash<std::tuple<float, float, float> >, bool(const std::tuple<float, float, float>&, const std::tuple<float, float, float>&)>’
unordered_set_triplet.cpp:21:26:   required from here
/usr/include/c++/12/bits/hashtable.h:665:7: error: function returning a function
  665 |       key_eq() const
      |       ^~~~~~
In file included from /usr/include/c++/12/unordered_set:47:
/usr/include/c++/12/bits/unordered_set.h: In instantiation of ‘class std::unordered_set<std::tuple<float, float, float>, std::hash<std::tuple<float, float, float> >, bool(const std::tuple<float, float, float>&, const std::tuple<float, float, float>&)>’:
unordered_set_triplet.cpp:21:26:   required from here
/usr/include/c++/12/bits/unordered_set.h:632:7: error: function returning a function
  632 |       key_eq() const
      |       ^~~~~~
unordered_set_triplet.cpp: In function ‘int main()’:
unordered_set_triplet.cpp:21:49: error: expected primary-expression before ‘,’ token
   21 |   unordered_set_triplet s(10, std::hash<triplet>, triplet_equal);
      |                                                 ^

How to fix this?

EDIT

Doesn't work either using (ordered) set:

>> cat set_triplet.cpp 
#include <iostream>
#include <set>
#include <tuple>
#include <limits> // numeric_limits
#include <cmath> // fabs
#include <functional> // hash

using triplet = std::tuple<float, float, float>;
bool triplet_equal(triplet const & lhs, triplet const & rhs) {
  float const eps = std::numeric_limits<float>::epsilon();
  if (std::fabs(std::get<0>(lhs) - std::get<0>(rhs)) > eps) return false;
  if (std::fabs(std::get<1>(lhs) - std::get<1>(rhs)) > eps) return false;
  if (std::fabs(std::get<2>(lhs) - std::get<2>(rhs)) > eps) return false;
  return true;
}
using set_triplet = std::set<triplet, std::hash<triplet>, decltype(triplet_equal)>;

int main() {
  //set_triplet s; // Compilation: KO...
  set_triplet s(10, std::hash<triplet>, triplet_equal);
  s.insert({1.f, 2.f, 3.f});
  s.insert({1.0000001f, 2.0000001f, 3.0000001f});
  for (auto const & t : s) std::cout << std::get<0>(t) << ", " << std::get<1>(t) << ", " << std::get<2>(t) << std::endl;
}

What container could be appropriate to use? triplet can be seen a 3D point (XYZ): I need to handle/detect duplicate points.

SOLUTION: USING SET (NOT UNORDERED SET) AND INT (NOT FLOAT) WITHOUT IMPLEMENTING LESS

Using tuples made of integer i built this way i = (int) 1000000 * f from float f and using set (as operator< will respect strict ordering up to 6-digit precision after multiplication by 1000000).

>> cat set_uint32_triplet.cpp 
#include <iostream>
#include <set>
#include <tuple>

using triplet_uint32 = std::tuple<uint32_t, uint32_t, uint32_t>;
using triplet_float = std::tuple<float, float, float>;
triplet_uint32 convert(triplet_float const & f) {
  uint32_t precision = 1000000; // Allow for 6-digit precision.
  uint32_t x = (uint32_t) (std::get<0>(f) * precision);
  uint32_t y = (uint32_t) (std::get<1>(f) * precision);
  uint32_t z = (uint32_t) (std::get<2>(f) * precision);
  return {x, y, z};
}

int main() {
  triplet_float pt1 = {1.f, 2.f, 3.f};
  triplet_float pt2 = {1.0000001f, 2.0000001f, 3.0000001f}; // Considered     duplicate with pt1.
  triplet_float pt3 = {1.000001f,  2.000001f,  3.000001f};  // Considered NOT duplicate with pt1.

  std::set<triplet_uint32> s;
  s.insert(convert(pt1));
  s.insert(convert(pt2));
  s.insert(convert(pt3));
  std::cout << "set size " << s.size() << std::endl;
  for (auto const & t : s) std::cout << "set item " << std::get<0>(t) << ", " << std::get<1>(t) << ", " << std::get<2>(t) << ", " << std::endl;
}

>> g++ -std=c++20 -o set_uint32_triplet set_uint32_triplet.cpp

>> ./set_uint32_triplet 
set size 2
set item 1000000, 2000000, 3000000, 
set item 1000000, 2000001, 3000001, 

SOLUTION: USING SET (NOT UNORDERED SET) AND INT (NOT FLOAT) WITH IMPLEMENTING LESS

>> cat set_uint32_triplet_less.cpp 
#include <iostream>
#include <set>
#include <tuple>

#define FLOAT_TO_INT(x) ((x)>=0?(int32_t)((x)+0.5):(int32_t)((x)-0.5))

using triplet_int32 = std::tuple<int32_t, int32_t, int32_t>;
using triplet_float = std::tuple<float, float, float>;
triplet_int32 convert(triplet_float const & f) {
  int32_t precision = 1000; // Allow for 3-digit precision.
  int32_t x = FLOAT_TO_INT(std::get<0>(f) * precision);
  int32_t y = FLOAT_TO_INT(std::get<1>(f) * precision);
  int32_t z = FLOAT_TO_INT(std::get<2>(f) * precision);
  //std::cout.precision(10);
  //std::cout << "convert: " << std::get<0>(f) << ", " << std::get<1>(f) << ", " << std::get<2>(f);
  //std::cout << " - " << std::get<0>(f) * precision << ", " << std::get<1>(f) * precision << ", " << std::get<2>(f) * precision;
  //std::cout << " - " << FLOAT_TO_INT(std::get<0>(f) * precision) << ", " << FLOAT_TO_INT(std::get<1>(f) * precision) << ", " << FLOAT_TO_INT(std::get<2>(f) * precision);
  //std::cout << " - " << x << ", " << y << ", " << z << std::endl;
  return {x, y, z};
}
struct less {
  bool operator()(triplet_int32 const & lhs,
                  triplet_int32 const & rhs) const {
    bool res = true; // lhs < rhs.
    if      (std::get<0>(lhs) >= std::get<0>(rhs)) res = false;
    else if (std::get<1>(lhs) >= std::get<1>(rhs)) res = false;
    else if (std::get<2>(lhs) >= std::get<2>(rhs)) res = false;
    //std::cout << "  less: " << std::get<0>(lhs) << ", " << std::get<1>(lhs) << ", " << std::get<2>(lhs);
    //std::cout <<    " - " << std::get<0>(rhs) << ", " << std::get<1>(rhs) << ", " << std::get<2>(rhs);
    //std::cout << " - res " << res << std::endl;
    return res; // lhs < rhs.
  }
};

int main() {
  triplet_float pt1 = {1.f, 2.f, 3.f};
  triplet_float pt2 = {1.0001f, 2.0001f, 3.0001f}; // Considered     duplicate with pt1.
  triplet_float pt3 = {1.001f,  2.001f,  3.001f};  // Considered NOT duplicate with pt1.

  std::set<triplet_int32, less> s;
  s.insert(convert(pt1));
  s.insert(convert(pt2));
  s.insert(convert(pt3));
  std::cout << "set size " << s.size() << std::endl;
  for (auto const & t : s) std::cout << "set item " << std::get<0>(t) << ", " << std::get<1>(t) << ", " << std::get<2>(t) << ", " << std::endl;
}
>> g++ -std=c++20 -o set_uint32_triplet_less set_uint32_triplet_less.cpp
>> ./set_uint32_triplet_less 
set size 2
set item 1000, 2000, 3000, 
set item 1001, 2001, 3001, 
2

There are 2 best solutions below

12
Jan Schultke On BEST ANSWER

Problems with KeyEqual

Firstly, the KeyEqual provided to the std::unordered_set can't be a function, and that's what you're trying to do with decltype(triplet_equal). However, it could be a function pointer. Normally, you should use a function object as follows:

struct triplet_equal {
    // note: static constexpr only since C++23, otherwise remove those two
    static constexpr bool operator()(triplet const & lhs, triplet const & rhs) const {
        float const eps = std::numeric_limits<float>::epsilon();
        if (std::fabs(std::get<0>(lhs) - std::get<0>(rhs)) > eps) return false;
        if (std::fabs(std::get<1>(lhs) - std::get<1>(rhs)) > eps) return false;
        if (std::fabs(std::get<2>(lhs) - std::get<2>(rhs)) > eps) return false;
        return true;
    }
};

// ...
std::unordered_set<triplet, std::hash<triplet>, triplet_equal> s(10);

You don't have to provide any value for the hash or for triplet_equal to the constructor because they are default-constructible.

Problems with Hash

The next huge issue is that the standard library has no std::hash specialization for tuples. Look into Generic hash for tuples in unordered_map / unordered_set if you want to make your own.

However, even if there was one, the problem remains that two equal tuples (where equality is determined by triplet_equal) must also have the same hash. You would have to specialize std::hash yourself so that two equal tuples always have the same hash, despite floating-point imprecision. You might be able to do it by quantizing floats, but I imagine it would be very difficult to do correctly.

Alternative: use std::set and provide a Compare

It would be much easier to use std::set, which only requires you to implement a less-than comparison:

// checks whether x < y after quantization to a multiple of epsilon
constexpr float eps_less_than(float x, float y) {
    constexpr float e = std::numeric_limits<float>::epsilon();
    // use simple comparison if numbers are far apart
    float d = x - y;
    if (std::fabs(d) >= 2 * e) {
        return d < 0;
    }
    return std::floor(x * (1 / e)) < std::floor(y * (1 / e));
}

// lexicographical comparison
struct triplet_less {
    // constexpr since C++23
    constexpr bool operator()(triplet const & lhs, triplet const & rhs) const {
        if (eps_less_than(std::get<0>(lhs), std::get<0>(rhs))) return true;
        if (eps_less_than(std::get<0>(rhs), std::get<0>(lhs))) return false;

        if (eps_less_than(std::get<1>(lhs), std::get<1>(rhs))) return true;
        if (eps_less_than(std::get<1>(rhs), std::get<1>(lhs))) return false;

        if (eps_less_than(std::get<2>(lhs), std::get<2>(rhs))) return true;
        return false;
    }
};

int main() {
    std::set<triplet, triplet_less> s;
    s.insert({1.f, 2.f, 3.f});
}

See live example at Compiler Explorer

Further Notes

It would be much better not to use std::tuple, but use a simple aggregate type as follows:

struct vec3 {
    float x, y, z;
    // C++20: default all comparison operators
    // (you still need a custom vec3_equal to deal with precision issues)
    friend auto constexpr operator<=>(vec3 const&, vec3 const&) = default;
};

With defaulted comparison operators, it's very easy to get all the functionality of std::tuple, and you can write lhs.x instead of needing std::get<0>(lhs) and other annoyances.

0
Jeffrey On

This is probably not the answer you want, but it offers you a solution to the transitivity issue of the accepted answer.

Use std::tuple<int, int, int> and quantize your floats by multiplying them by some constant (say 10000) and rounding them down. The scale factor will depend on your domain.

If you need to keep the original value, use an unordered_map. Use the int tuple for key and add a float tuple for the value. Then, when you search for a given triplet, you can look up the adjacents tuple in each of the three values.

That is more complex and more work, but it would be a more correct approach and avoid UB due to transitivity issues.