Published on

The Algorithm Design Manual (3rd) Exercise Solution 'E3-32'

Table of Contents

Introduction

This series is my solutions for algorithm exercises in'The Algorithm Design Manual' 3rd edition.
It's my solutions, not model answers, so if you find some mistakes or more sophisticated solutions, please post in comment area below.

Repository

Here

Exercise 3-32

Question

Implement versions of several different dictionary data structures, such as linked lists, binary trees, balanced binary search trees, and hash tables. Conduct experiments to assess the relative performance of these data structures in a simple application that reads a large text file and reports exactly one instance of each word that appears within it. This application can be efficiently implemented by maintaining a dictionary of all distinct words that have appeared thus far in the text and inserting/reporting each new word that appears in the stream. Write a brief report with your conclusions.

Solution

Outline

The implementation of singly linked list and binary search tree is simple and corresponds to those in textbook.

For the implementation of balanced binary search tree I use Red-Black-Tree, for the structure is typical and widespread.

For the implementation of hash table I use "Chaining" technique in textbook, in which my own implementation of singly linked list is used.

Codes

Singly Linked List
#ifndef impl_singly_linked_list
#define impl_singly_linked_list

#include <concepts>
#include <memory>

template <std::equality_comparable T>
struct node {
    T item;
    std::shared_ptr<node> next;
};

template <std::equality_comparable T>
class Singly_linked_list {
    using mnode = node<T>;

   private:
    std::shared_ptr<mnode> head;
    unsigned int m_size;

   public:
    Singly_linked_list() = default;
    Singly_linked_list(const Singly_linked_list&) = delete;
    Singly_linked_list(Singly_linked_list&&) = delete;
    Singly_linked_list(std::initializer_list<T> list)
        : m_size{static_cast<unsigned int>(list.size())} {
        if (!m_size) return;
        auto p_l{std::cbegin(list)};
        head = std::make_shared<mnode>(*p_l++);
        auto prev{head};
        for (; p_l != std::cend(list); p_l++) {
            auto p_node = std::make_shared<mnode>(*p_l);
            prev->next = p_node;
            prev = p_node;
        }
    }
    ~Singly_linked_list() {};
    Singly_linked_list& operator=(const Singly_linked_list&) = delete;

    unsigned int size() { return m_size; }

    std::shared_ptr<mnode> search(T val) {
        for (auto nd{head}; nd; nd = nd->next)
            if (nd->item == val) return nd;
        return nullptr;
    }

    void insert(T val) {
        head = std::make_shared<mnode>(val, head);
        ++m_size;
    }

    T head_val() { return head->item; }
    T tail_val() {
        std::shared_ptr<mnode> prev;
        for (auto nd{head}; nd; nd = nd->next) prev = nd;
        return prev->item;
    }

    template <std::regular_invocable<T> Func>
    void traverse(Func func) {
        std::shared_ptr<mnode> p_node;
        std::shared_ptr<mnode> next{head};
        while (next) {
            p_node = next;
            func(p_node->item);
            next = p_node->next;
        }
    }
};
#endif
Binary Search Tree
#ifndef impl_binary_search_tree
#define impl_binary_search_tree

#include <concepts>
#include <initializer_list>
#include <memory>

template <std::totally_ordered T>
class Binary_search_tree {
   public:
    struct tree {
        T item;
        std::shared_ptr<tree> parent;
        std::shared_ptr<tree> left;
        std::shared_ptr<tree> right;
    };
    using p_tree = std::shared_ptr<tree>;

   private:
    p_tree root;
    int size{0};

   public:
    Binary_search_tree(std::initializer_list<T> list) {
        for (auto &&e : list) insert_tree(e);
    };

    int get_size() const { return size; };

    template <std::regular_invocable<p_tree> Func>
    void traverse(Func func) {
        const auto helper{[&](auto helper, const p_tree tree) -> void {
            if (tree) {
                helper(helper, tree->left);
                func(tree);
                helper(helper, tree->right);
            }
        }};
        helper(helper, root);
    }

    void insert_tree(T val) {
        auto helper{[&](auto helper, p_tree parent, bool is_min) -> void {
            p_tree *p_tree;
            if (val < parent->item)
                p_tree = &(parent->left);
            else if (val == parent->item)
                return;
            else {
                p_tree = &(parent->right);
                is_min = false;
            }
            if (!(*p_tree)) {
                *p_tree = std::make_shared<tree>(val, parent, nullptr, nullptr);
                return;
            } else {
                helper(helper, *p_tree, is_min);
            }
        }};
        if (!root) {
            root = std::make_shared<tree>(val, nullptr, nullptr, nullptr);
        } else
            helper(helper, root, true);
        ++size;
    }
};

#endif
Balanced Binary Search Tree
#ifndef impl_red_black_tree
#define impl_red_black_tree

#include <concepts>

using namespace std;

template <typename T>
struct Node {
    Node<T> *parent;
    Node<T> *left;
    Node<T> *right;
    int color;
    T data;
};

template <typename T>
using pNode = Node<T> *;

template <typename T>
class RedBlackTree {
    using NodePtr = pNode<T>;

   private:
    NodePtr root;
    NodePtr TNULL;

    void initializeNULLNode(NodePtr node, NodePtr parent) {
        node->parent = parent;
        node->left = nullptr;
        node->right = nullptr;
        node->color = 0;
    }

    NodePtr searchTreeHelper(NodePtr node, int key) {
        if (node == TNULL || key == node->data) {
            return node;
        }

        if (key < node->data) {
            return searchTreeHelper(node->left, key);
        }
        return searchTreeHelper(node->right, key);
    }

    // For balancing the tree after insertion
    void insertFix(NodePtr k) {
        NodePtr u;
        while (k->parent->color == 1) {
            if (k->parent == k->parent->parent->right) {
                u = k->parent->parent->left;
                if (u->color == 1) {
                    u->color = 0;
                    k->parent->color = 0;
                    k->parent->parent->color = 1;
                    k = k->parent->parent;
                } else {
                    if (k == k->parent->left) {
                        k = k->parent;
                        rightRotate(k);
                    }
                    k->parent->color = 0;
                    k->parent->parent->color = 1;
                    leftRotate(k->parent->parent);
                }
            } else {
                u = k->parent->parent->right;

                if (u->color == 1) {
                    u->color = 0;
                    k->parent->color = 0;
                    k->parent->parent->color = 1;
                    k = k->parent->parent;
                } else {
                    if (k == k->parent->right) {
                        k = k->parent;
                        leftRotate(k);
                    }
                    k->parent->color = 0;
                    k->parent->parent->color = 1;
                    rightRotate(k->parent->parent);
                }
            }
            if (k == root) {
                break;
            }
        }
        root->color = 0;
    }

   public:
    RedBlackTree() {
        TNULL = new Node<T>;
        TNULL->color = 0;
        TNULL->left = nullptr;
        TNULL->right = nullptr;
        root = TNULL;
    }

    NodePtr searchTree(int k) { return searchTreeHelper(this->root, k); }

    NodePtr minimum(NodePtr node) {
        while (node->left != TNULL) {
            node = node->left;
        }
        return node;
    }

    NodePtr maximum(NodePtr node) {
        while (node->right != TNULL) {
            node = node->right;
        }
        return node;
    }

    NodePtr successor(NodePtr x) {
        if (x->right != TNULL) {
            return minimum(x->right);
        }

        NodePtr y = x->parent;
        while (y != TNULL && x == y->right) {
            x = y;
            y = y->parent;
        }
        return y;
    }

    NodePtr predecessor(NodePtr x) {
        if (x->left != TNULL) {
            return maximum(x->left);
        }

        NodePtr y = x->parent;
        while (y != TNULL && x == y->left) {
            x = y;
            y = y->parent;
        }

        return y;
    }

    void leftRotate(NodePtr x) {
        NodePtr y = x->right;
        x->right = y->left;
        if (y->left != TNULL) {
            y->left->parent = x;
        }
        y->parent = x->parent;
        if (x->parent == nullptr) {
            this->root = y;
        } else if (x == x->parent->left) {
            x->parent->left = y;
        } else {
            x->parent->right = y;
        }
        y->left = x;
        x->parent = y;
    }

    void rightRotate(NodePtr x) {
        NodePtr y = x->left;
        x->left = y->right;
        if (y->right != TNULL) {
            y->right->parent = x;
        }
        y->parent = x->parent;
        if (x->parent == nullptr) {
            this->root = y;
        } else if (x == x->parent->right) {
            x->parent->right = y;
        } else {
            x->parent->left = y;
        }
        y->right = x;
        x->parent = y;
    }

    // Inserting a node
    void insert(T key) {
        NodePtr y = nullptr;
        NodePtr x = this->root;

        while (x != TNULL) {
            y = x;
            if (key < x->data) {
                x = x->left;
            } else if (key == x->data) {
                return;
            } else {
                x = x->right;
            }
        }

        NodePtr node = new Node<T>;
        node->parent = nullptr;
        node->data = key;
        node->left = TNULL;
        node->right = TNULL;
        node->color = 1;
        node->parent = y;

        if (y == nullptr) {
            root = node;
        } else if (node->data < y->data) {
            y->left = node;
        } else {
            y->right = node;
        }

        if (node->parent == nullptr) {
            node->color = 0;
            return;
        }

        if (node->parent->parent == nullptr) {
            return;
        }

        insertFix(node);
    }

    NodePtr getRoot() { return this->root; }

    template <std::regular_invocable<NodePtr> Func>
    void traverse(Func func) const {
        const auto helper{[&func](auto helper, const NodePtr node) -> void {
            if (node) {
                helper(helper, node->left);
                func(node);
                helper(helper, node->right);
            }
        }};
        helper(helper, root);
    }
};

#endif
Hash Table
#ifndef impl_hash_table
#define impl_hash_table

#include <algorithm>
#include <concepts>
#include <vector>

#include "./singly-linked-list.hpp"

constexpr size_t T_SIZE{1 << 12};

template <class T>
class Hash_table {
   private:
    std::vector<Singly_linked_list<T>> table;
    std::vector<Singly_linked_list<T> *> refs;

   public:
    Hash_table() : table(T_SIZE) {}
    Hash_table(std::initializer_list<T> lst) : Hash_table() {
        for (auto &&e : lst) {
            insert(e);
        }
    }
    void insert(T e) {
        std::hash<T> hasher{};
        auto hash(hasher(e));
        auto &lst{table[hash % T_SIZE]};
        auto it{lst.search(e)};
        if (!it) {
            if (!lst.size()) refs.push_back(&lst);
            lst.insert(e);
        }
    }
    void insert(std::vector<T> &v) {
        for (auto &&e : v) insert(e);
    }

    template <std::regular_invocable<T> Func>
    void traverse(Func func) {
        for (const auto lst : refs) {
            lst->traverse(func);
        }
    }
};

#endif
TIme Measurement Routine
#include <chrono>
#include <concepts>
#include <fstream>
#include <iostream>
#include <sstream>
#include <string>
#include <vector>

#include "./binary-search-tree.hpp"
#include "./hash-table.hpp"
#include "./red-black-tree.hpp"
#include "./singly-linked-list.hpp"

using namespace std::string_literals;

class Base_dic {
   protected:
    std::string output_name;
    void print_s(std::ostream& o, std::string_view s) { o << s << std::endl; }

   public:
    Base_dic(std::string_view sv) : output_name{sv} {}
    virtual ~Base_dic() = default;
    virtual void insert(std::string& s) = 0;
    virtual void print_all(std::ofstream& outf) = 0;
    std::string_view get_output_name() const { return output_name; }
};

class SLL_dic : public Base_dic {
   private:
    Singly_linked_list<std::string> lst{};

   public:
    virtual ~SLL_dic() = default;
    SLL_dic() : Base_dic("singly-linked-list-result") {}
    void insert(std::string& s) override {
        auto pnode{lst.search(s)};
        if (!pnode) lst.insert(s);
    }
    void print_all(std::ofstream& outf) {
        lst.traverse([&](std::string e) { print_s(outf, e); });
    };
};

class BST_dic : public Base_dic {
   private:
    Binary_search_tree<std::string> bst{};

   public:
    virtual ~BST_dic() = default;
    BST_dic() : Base_dic("binary-serach-tree-result") {}
    void insert(std::string& s) override { bst.insert_tree(s); }
    void print_all(std::ofstream& outf) {
        bst.traverse([&](Binary_search_tree<std::string>::p_tree p_tree) {
            print_s(outf, p_tree->item);
        });
    };
};

class RBT_dic : public Base_dic {
   private:
    RedBlackTree<std::string> bst{};

   public:
    virtual ~RBT_dic() = default;
    RBT_dic() : Base_dic("red-black-tree-result") {}
    void insert(std::string& s) override { bst.insert(s); }
    void print_all(std::ofstream& outf) {
        bst.traverse([&](pNode<std::string> p_node) {
            if (p_node->data.length()) print_s(outf, p_node->data);
        });
    };
};

class HT_dic : public Base_dic {
   private:
    Hash_table<std::string> bst{};

   public:
    virtual ~HT_dic() = default;
    HT_dic() : Base_dic("hash-table-result") {}
    void insert(std::string& s) override { bst.insert(s); }
    void print_all(std::ofstream& outf) {
        bst.traverse([&](std::string s) { print_s(outf, s); });
    };
};

template <typename Dic>
    requires std::derived_from<Dic, Base_dic>
void count_words(std::ifstream& inf, Dic& dic) {
    std::string s;
    while (inf >> s) {
        dic.insert(s);
    }

    std::ostringstream name;
    name << "./"s << dic.get_output_name() << ".txt"s;
    std::ofstream outf{name.str()};
    dic.print_all(outf);
}

template <std::regular_invocable<> Func>
void measure_time(std::ofstream& outf, Func func,
                  std::string_view target_name) {
    auto start = chrono::high_resolution_clock::now();
    auto erapsed{
        [&]() { return chrono::high_resolution_clock::now() - start; }};

    auto get_milli_ticks{[](const std::chrono::nanoseconds& d) {
        return chrono::duration_cast<chrono::milliseconds>(d).count();
    }};

    func();

    outf << "Erapsed time for " << target_name << ":"
         << get_milli_ticks(erapsed()) << " milliseconds." << std::endl;
}

int main(int argc, char const* argv[]) {
    std::vector<std::shared_ptr<Base_dic>> vdic;
    vdic.push_back(std::shared_ptr<Base_dic>(new SLL_dic{}));
    vdic.push_back(std::shared_ptr<Base_dic>(new BST_dic{}));
    vdic.push_back(std::shared_ptr<Base_dic>(new RBT_dic{}));
    vdic.push_back(std::shared_ptr<Base_dic>(new HT_dic{}));

    std::ifstream inf{"./some-text.txt"};
    if (!inf) {
        std::cerr << "cannot open file" << std::endl;
        return 1;
    }
    std::ofstream outf{"./result-time.txt"};
    if (!outf) {
        std::cerr << "cannot operate file" << std::endl;
        return 1;
    }

    outf.imbue(std::locale("en_US.UTF-8"));

    for (auto&& dic : vdic) {
        auto count_words_for{[&]() { count_words(inf, *dic); }};
        measure_time(outf, count_words_for, dic->get_output_name());
        inf.clear();
        inf.seekg(0);
    }

    return 0;
}

Summary of Result

Each of Erapsed time is:

Erapsed time for singly-linked-list-result  :5,243 milliseconds.
Erapsed time for binary-serach-tree-result  :78 milliseconds.
Erapsed time for red-black-tree-result      :39 milliseconds.
Erapsed time for hash-table-result          :32 milliseconds.

As we expected, the time of singly linked list (O(n2)O(n^2)) is much larger than others.
Balance of binary tree doesn't improve the time in the order level, but improves anyway.
Hash table is the most suitable with respect to time, though there is no significant difference between the bottom two numbers in the list above by the data I used.