Intrusive, circular, branchless, doubly linked list - how to have the list to identify the node member fields?

https://stackoverflow.com/questions/22256092

11-06-2023
|

Pergunta

The code is posted here: https://ideone.com/ul2PiS

What I'm trying to do is allow the user to specify the list nodes as member fields of the classes which will be added to the lists. Currently, this is done with a macro using offsetof() which means that the member nodes have to be public. Ideally, I'd like to be able to somehow specify, as part of each linked_list declaration, as a template parameter, which member field should be used as the node. boost::intrusive seems to manage it, but I can't quite follow how they do it.

It's a requirement that the list functions be branchless (no pointer comparisons, the platform has a very very slow branch mechanism) and that the list be intrusive and allow objects to be members of multiple lists simultaneously.

//////////////////////////////////////////////////////////////////////
// linked_list.h

#pragma once

//////////////////////////////////////////////////////////////////////

#include <cstddef>
#include <functional>

//////////////////////////////////////////////////////////////////////

struct list_node
{
    list_node *next;
    list_node *prev;
};

//////////////////////////////////////////////////////////////////////

template<typename T, size_t off> struct linked_list
{
    //////////////////////////////////////////////////////////////////////

    static list_node const *get_node(T const *o)
    {
        return reinterpret_cast<list_node const *>(reinterpret_cast<char const *>(o) + off);
    }

    //////////////////////////////////////////////////////////////////////

    static list_node *get_node(T *o)
    {
        return reinterpret_cast<list_node *>(reinterpret_cast<char *>(o) + off);
    }

    //////////////////////////////////////////////////////////////////////

    static T const *get_object(list_node const *node)
    {
        return reinterpret_cast<T const *>(reinterpret_cast<char const *>(node) - off);
    }

    //////////////////////////////////////////////////////////////////////

    static T *get_object(list_node *node)
    {
        return reinterpret_cast<T *>(reinterpret_cast<char *>(node) - off);
    }

    //////////////////////////////////////////////////////////////////////

    list_node root;

    //////////////////////////////////////////////////////////////////////

    linked_list()
    {
        root.next = &root;
        root.prev = &root;
    }

    //////////////////////////////////////////////////////////////////////

    void push_front(T *obj)
    {
        list_node *node = get_node(obj);
        root.next->prev = node;
        node->next = root.next;
        node->prev = &root;
        root.next = node;
    }

    //////////////////////////////////////////////////////////////////////

    void push_front(T &obj)
    {
        list_node *node = get_node(&obj);
        root.next->prev = node;
        node->next = root.next;
        node->prev = &root;
        root.next = node;
    }

    //////////////////////////////////////////////////////////////////////

    void push_back(T *obj)
    {
        list_node *node = get_node(obj);
        node->prev = root.prev;
        node->next = &root;
        root.prev->next = node;
        root.prev = node;
    }

    //////////////////////////////////////////////////////////////////////

    void push_back(T &obj)
    {
        list_node *node = get_node(&obj);
        node->prev = root.prev;
        node->next = &root;
        root.prev->next = node;
        root.prev = node;
    }

    //////////////////////////////////////////////////////////////////////

    void insert_before(T *pos, T *obj)
    {
        list_node *node = get_node(obj);
        list_node *n = get_node(pos);
        n->prev->next = node;
        node->prev = n->prev;
        n->prev = node;
        node->next = n;
    }

    //////////////////////////////////////////////////////////////////////

    void insert_before(T &pos, T &obj)
    {
        list_node *node = get_node(&obj);
        list_node *n = get_node(&pos);
        n->prev->next = node;
        node->prev = n->prev;
        n->prev = node;
        node->next = n;
    }

    //////////////////////////////////////////////////////////////////////

    void insert_after(T *pos, T *obj)
    {
        list_node *node = get_node(obj);
        list_node *n = get_node(pos);
        n->next->prev = node;
        node->next = n->next;
        n->next = node;
        node->prev = n;
    }

    //////////////////////////////////////////////////////////////////////

    void insert_after(T &pos, T &obj)
    {
        list_node *node = get_node(&obj);
        list_node *n = get_node(&pos);
        n->next->prev = node;
        node->next = n->next;
        n->next = node;
        node->prev = n;
    }

    //////////////////////////////////////////////////////////////////////

    void remove(T *obj)
    {
        list_node *node = get_node(obj);
        node->prev->next = node->next;
        node->next->prev = node->prev;
    }

    //////////////////////////////////////////////////////////////////////

    void remove(T &obj)
    {
        list_node *node = get_node(&obj);
        node->prev->next = node->next;
        node->next->prev = node->prev;
    }

    //////////////////////////////////////////////////////////////////////

    T *pop_back()
    {
        list_node *node = root.prev;
        node->prev->next = node->next;
        node->next->prev = node->prev;
        return get_object(node);
    }

    //////////////////////////////////////////////////////////////////////

    T *pop_front()
    {
        list_node *node = root.next;
        node->next->prev = node->prev;
        node->prev->next = node->next;
        return get_object(node);
    }

    //////////////////////////////////////////////////////////////////////

    bool empty() const
    {
        return root.next == &root;
    }

    //////////////////////////////////////////////////////////////////////

    void clear()
    {
        root.next = root.prev = &root;
    }

    //////////////////////////////////////////////////////////////////////

    T *head() const
    {
        return get_object(root.next);
    }

    //////////////////////////////////////////////////////////////////////

    T *tail() const
    {
        return get_object(root.prev);
    }

    //////////////////////////////////////////////////////////////////////

    T const *end()
    {
        return get_object(&root);
    }

    //////////////////////////////////////////////////////////////////////

    T *next(T *i) const
    {
        return get_object(get_node(i)->next);
    }

    //////////////////////////////////////////////////////////////////////

    T *prev(T *i) const
    {
        return get_object(get_node(i)->prev);
    }

    //////////////////////////////////////////////////////////////////////

    bool for_each(std::function<bool (T *)> func)
    {
        for(T *i = head(); i != end(); i = next(i))
        {
            if(!func(i))
            {
                return false;
            }
        }
        return true;
    }

    //////////////////////////////////////////////////////////////////////

    T *find(std::function<bool (T *)> func)
    {
        for(T *i = head(); i != end(); i = next(i))
        {
            if(func(i))
            {
                return i;
            }
        }
        return nullptr;
    }

    //////////////////////////////////////////////////////////////////////

};

//////////////////////////////////////////////////////////////////////

// Yuck:

#define declare_linked_list(type_name, node_name) \
    linked_list<type_name, offsetof(type_name, node_name)>

#define typedef_linked_list(type_name, node_name) \
    typedef declare_linked_list(type_name, node_name)

And the client code looks like:

//////////////////////////////////////////////////////////////////////
// main.cpp

#include <stdio.h>
#include <random>
#include "linked_list.h"

struct foo
{
    foo() : i(rand() % 10) { }

    int i;

    list_node node1;    // would like it if these could be made private
    list_node node2;    // but the nasty macros need to see inside...
    list_node node3;    // getting rid of the macros would be even better
};

// None of these 3 options are very nice:

// 1. declare a list with the macro
declare_linked_list(foo, node1) list1;

// 2. or via a typedef
typedef_linked_list(foo, node2) list2_t;
list2_t list2;

// 3. or very wordy non-macro declaration
linked_list<foo, offsetof(foo, node3)> list3;

int main(int, char **)
{
    printf("Begin\n");

    foo foos[10];

    for(int i=0; i<10; ++i)
    {
        list1.push_back(foos[i]);
        list2.push_back(foos[i]);
        list3.push_back(foos[i]);
    }

    int sum = 0;
    int n = 0;
    // but this for loop is clear and readable and has very low overhead
    for(foo *i = list1.head(); i != list1.end(); i = list1.next(i))
    {
        sum += i->i;
    }
    printf("Total: %d\n", sum);

    list2.remove(foos[2]);

    n = 0;
    sum = 0;
    for(foo *i = list2.head(); i != list2.end(); i = list2.next(i))
    {
        sum += i->i;
    }
    printf("Total2: %d\n", sum);

    getchar();
    return 0;
}

[EDIT] OK, with pointer to member template parameter it's much nicer:

template<typename T, list_node T::* node> struct linked_list
{
    //////////////////////////////////////////////////////////////////////

    static list_node const *get_node(T const *o)
    {
        return reinterpret_cast<list_node const *>
            (reinterpret_cast<char const *>(o) + offsetof(T, *node));
    }

    //////////////////////////////////////////////////////////////////////

    static list_node *get_node(T *o)
    {
        return reinterpret_cast<list_node *>
            (reinterpret_cast<char *>(o) + offsetof(T, *node));
    }

    //////////////////////////////////////////////////////////////////////

    static T const *get_object(list_node const *n)
    {
        return reinterpret_cast<T const *>
            (reinterpret_cast<char const *>(n) - offsetof(T, *node));
    }

    //////////////////////////////////////////////////////////////////////

    static T *get_object(list_node *n)
    {
        return reinterpret_cast<T *>
            (reinterpret_cast<char *>(n) - offsetof(T, *node));
    }

so you can declare lists like:

linked_list<foo, &foo::node1> list1;

so we (mostly) get rid of the ugly declaration syntax, although I still can't find a way to allow them to be private. It looks like boost::intrusive requires them to be public as well.

Solução

Here is a bit of a hack that will allow bijective mapping between a member and the struct, doing mere pointer arithmetic on constants at run time. static_assert( std::is_pod<T>::value, "POD needed!" ); might be a good idea:

template<typename T, typename N, N T::* member>
struct member_access {
  static N& ContainerToMember( T& t ) { return t.*member; }
  static N const& ContainerToMember( T const& t ) { return t.*member; }
  template<T* ptr=nullptr>
  static constexpr std::size_t offset_of() {
return reinterpret_cast<std::size_t>(reinterpret_cast<char*>(&((ptr)->*member)));
  }
  static T& MemberToContainer( N& n ) {
    return *reinterpret_cast<T*>(reinterpret_cast<char*>(&n)-offset_of());
  }
  static T const& MemberToContainer( N const& n ) {
    return *reinterpret_cast<T const*>(reinterpret_cast<char const*>(&n)-offset_of());
  }
};

which gets rid of your offsetof requirement.

Privacy is hard. There is the explicit specialization side effect hole in the privacy type system, but it results in runtime member pointers, and we want compile time values.

A better approach might be to stuff the linked lists into some metaprogramming:

template< typename T, unsigned index >
struct list_node:list_node<T, index-1> {
  list_node* next;
  list_node* prev;
  T* self() { static_cast<T*>(this); }
  T const* self() const { static_cast<T const*>(this); }
};
template<typename T>
struct list_node<T, 0> {
  list_node* next;
  list_node* prev;
  T* self() { static_cast<T*>(this); }
  T const* self() const { static_cast<T const*>(this); }
};
template<typename T, unsigned N>
struct listable : list_node<T, N-1> {};
template<typename T>
struct listable<T, 0> {};

then simply create your listable data like:

struct data: listable<data, 10> {
  std::string s;
};

We can then do this:

template<unsigned N, typename T>
list_node<T, N>& get_node( T& t ) { return t; }
template<unsigned N, typename T>
list_node<T, N> const& get_node( T const& t ) { return t; }
template<unsigned N, typename T>
T& get_data( list_node<T, N>& n ) { return *n.self(); }
template<unsigned N, typename T>
T const& get_data( list_node<T, N> const& n ) { return *n.self(); }

which is all legal C++11.

template<typename T, unsigned index>
struct linked_list {
  typedef list_node<T, index> node;
};

which is sort of nice. Downside: the list_node for each linked_list is a different type. Getting around that is a bit tricky.

We could stay in the realm of defined behavior by pushing all of the next and prev pointers to a POD array beyond the root of the topmost list_node. We then work with pointers into that. We can legally do pointer arithmetic within that array, and a pointer to the first element of the most ancestral struct can be legally reinterpreted to being a pointer to that struct.

From that, we can static_cast down to a T* directly (which is probably no binary change in the pointer value at all).

struct link { link* next; link* prev; };

template<typename T, unsigned N>
struct listable {
  std::array<link, N> links;
  static listable* array_to_list( std::array<link, N>* a ) { return reinterpret_cast<listable>(a); }
  template<unsigned I>
  static listable* link_to_list( link* l ) {
    return array_to_list( reinterpret_cast<std::array<link, N>>(l - I) );
  }
  template<unsigned I>
  link* get_link() { return &links[I]; }
  T* listable_to_T() {
    static_assert( std::is_base_of< listable, T >::value, "CRTP vailure" );
    return static_cast<T*>(this);
  }
  template<unsigned I>
  static T* link_to_T(link* l) {
    return link_to_list<I>(l)->listable_to_T()
  }
};

which at most requests that a pointer-to-first-element and a pointer-to-array be layout compatible (I hope so!)

Again, under this model, your link listable data does:

struct Bob : listable<Bob, 10> {};

to say that there are 10 embedded lists in Bob. Converting from a Bob to a link* involves invoking bob.get_link<5>(), while converting from a link* to a Bob* is listable<Bob, 10>::link_to_T<5>( l ), assuming we are talking about sub-list 5.

Licenciado em: CC-BY-SA com atribuição

Não afiliado a StackOverflow