基本数据结构介绍及其C++实现(上)

  • 时间:
  • 浏览:3
  • 来源:跟我学网络

数组、链表、队列、栈、树、图是最基本的数据结构,其中“数组、链表、队列、栈”属于线性结构,每个节点只有一个前节点和后节点(若不是循环线性结构,头节点没有前节点,尾节点没有后节点),“树、图”是非线性结构,树中的节点只有一个父节点(根节点没有父节点),可以有若干个子节点,图中的节点可以若干个连通节点,从“线性”到“非线性”是数据组织方式的一种突破,”思考方式的突破“则带来了生产力的极大提高,了解其实现逻辑和常用的算法对日常的开发工作非常有帮助。

下面代码在C++11环境下编译通过并经过了测试验证(g++ -std=c++11 xx, xx代表文件名),因采用模板的方式,所以类的实现都在头文件中了。整个内容因代码原因排版较长,现将本篇文章的内容列举出来:

  • 数组(Array)
  • 单向链表
  • 双向链表
  • 循环链表
  • 栈:1)用数组实现的栈;2)用单链表实现的栈;
  • 单向队列:1)用循环数组实现的单向队列;2)用循环链表实现的单向队列
  • 双向队列(用双向链表实现)
  • 树的重要概念及各个类型的树的相关特性
  • 二叉堆树的实现(完全二叉树的特例,底层用数组实现)
  • 优先级队列
  • HashTable
  1. 数组(Array)

采用数组结构来实现游戏分数Top N排名功能

// GameEntry.hpp
_Pragma("once")
#include <string>

class GameEntry { // a game score entry
    public:
        GameEntry(const std::string& n="", int s=0); // constructor
        std::string getName() const; // get player name
        int getScore() const;// get score
    private:
        std::string name; // player’s name
         int score; // player’s score
};

//GameEntry.cpp
#include "GameEntry.hpp"

GameEntry::GameEntry(const std::string& n, int s) // constructor
    : name(n), score(s)
{
}

std::string GameEntry::getName() const
{
    return name;
}

int GameEntry::getScore() const
{
    return score;
}

// Scores.hpp
_Pragma("once")
#include "GameEntry.hpp"

class Scores
{
    public:
        Scores(int maxEnt = 10);
        Scores(const Scores& s);
        Scores(Scores&& s);
        Scores& operator = (const Scores& s);
        ~Scores();
        void add (const GameEntry&e);
        GameEntry remove(int i) noexcept(false);
        void display()const;
    private:
        void init(const Scores& s);
    private:
        int maxEntries;
        int numEntries;
        GameEntry* entries;
};

// Scores.cpp
#include "Scores.hpp"
#include <stdexcept>
#include <iostream>

Scores::Scores(int maxEnt)
{
    maxEntries = maxEnt;
    entries = new GameEntry[maxEntries];
    numEntries = 0;
}

void Scores::init(const Scores& s)
{
    maxEntries = s.maxEntries;
    numEntries = s.numEntries;
    entries = new GameEntry[maxEntries];
    for(int i = 0; i < numEntries; ++i)
    {
        entries[i] = s.entries[i];
    }
}

Scores::Scores(const Scores& s)
{
    init(s);
}

Scores::Scores(Scores&& s)
{
    maxEntries = s.maxEntries;
    numEntries = s.numEntries;
    entries = s.entries;
    s.entries = nullptr;
}

Scores& Scores::operator = (const Scores& s)
{
    if(this == &s)
    {
        return *this;
    }
    if(entries != nullptr)
    {
        delete []entries;
    }
    init(s);
    return *this;
}

Scores::~Scores()
{
    if(entries != nullptr)
    {
        delete []entries;
    }
}

void Scores::add(const GameEntry& e)
{
    int newScore = e.getScore();
    if(numEntries == maxEntries)
    {
        if(newScore <= entries[maxEntries - 1].getScore())
        {
            return;
        }
    }
    else
    {
        ++numEntries;
    }

    int i = numEntries - 2;
    while(i >= 0 && newScore > entries[i].getScore())
    {
        entries[i + 1] = entries[i];
        --i;
    }
    entries[i + 1] = e;
}

GameEntry Scores::remove(int i) noexcept(false)
{
    if((i < 0) || (i >= numEntries))
    {
        throw std::logic_error("Invalid Index");
    }
    GameEntry e = entries[i];
    for(int j = i + 1; j < numEntries; ++j)
    {
        entries[j - 1] = entries[j];
    }
    --numEntries;
    return e;
}

void Scores::display()const
{
    for(int i = 0; i < numEntries; ++i)
    {
        std::cout << "(" << entries[i].getName() << "," << entries[i].getScore() << ")" << ",";
    }
    std::cout << std::endl;
}

2. 单向链表

// SLinkedList.hpp
_Pragma("once")

#include <iostream>

template <typename E>
class SLinkedList;

template <typename E>
class SNode
{
    private:
        E elem;
        SNode<E>* next;
    friend SLinkedList<E>;
};


template <typename E>
class SLinkedList
{
    public:
        SLinkedList()
        {
            header = new SNode<E>;
            header->next = nullptr;
        }

        ~SLinkedList()
        {
            while(!empty())
            {
                removeFront();
            }
            delete header;
        }

        void init(const SLinkedList& s)
        {
            header = new SNode<E>;
            header->next = nullptr;
            SNode<E>* p = s.header->next;
            SNode<E> * q = header;
            while(p != nullptr)
            {
                SNode<E>* v = new SNode<E>;
                v->elem = p->elem;
                v->next = nullptr;
                p = p->next;
                q->next = v;
                q = q->next;
            }
        }

        SLinkedList(const SLinkedList& s)
        {
            std::cout << "SLinkedList(const SLinkedList& s)" << std::endl;
            init(s);
        }

        SLinkedList& operator = (const SLinkedList& s)
        {
            std::cout << "SLinkedList& operator = (const SLinkedList& s)" << std::endl;
            if(this == &s)
            {
                return *this;
            }
            while(!empty())
            {
                removeFront();
            }
            delete header;

            init(s);

            return *this;
        }

        SLinkedList(SLinkedList&& s)
        {
            std::cout << "SLinkedList::SLinkedList(SLinkedList&& s)" << std::endl;
            header = new SNode<E>;
            header->next = s.header->next;
            s.header->next = nullptr;
        }

        bool empty() const
        {
            return (nullptr == header->next);
        }

        const E& front() const
        {
            if(empty())
            {
                throw std::logic_error("SLinkedList empty");
            }
            return header->next->elem;
        }

        void addFront(const E& e)
        {
            SNode<E>* v = new SNode<E>;
            v->elem = e;
            v->next = header->next;
            header->next = v;
        }

        void removeFront()
        {
            if(empty())
            {
                return ;
            }
            SNode<E>* old = header->next;
            header->next = old->next;
            delete old;
        }

        void reserve()
        {
            if(empty())
            {
                return;
            }
            SNode<E> * p = header->next;
            SNode<E> * pn = NULL;
            SNode<E> * pnn = NULL;
            if(NULL == p->next )
            {
                return;
            }
            else
            {
                pn = p->next;
            }
            while(pn != NULL)
            {
                pnn = pn->next;
                pn->next = p;
                p = pn;
                pn = pnn;
            }
            header->next->next = NULL;
            header->next = p;
        }

        void display()const
        {
            if(empty())
            {
                return;
            }
            SNode<E> * p = header->next;
            while(p != NULL)
            {
                std::cout << p->elem << ", ";
                p = p ->next;
            }
            std::cout << std::endl;
        }
    private:
        SNode<E>* header;
};

3. 双向链表

// DLinkedList.hpp
_Pragma("once")

#include <iostream>

template <typename E>
class DLinkedList;

template <typename E>
class DNode
{
    private:
        E elem;
        DNode<E>* prev;
        DNode<E>* next;
    friend DLinkedList<E>;
};


template <typename E>
class DLinkedList
{
    public:
        DLinkedList()
        {
            header = new DNode<E>;
            tailer = new DNode<E>;
            header->next = tailer;
            tailer->prev = header;
        }

        ~DLinkedList()
        {
            while(!empty())
            {
                removeFront();
            }
            delete header;
            delete tailer;
        }

        void init(const DLinkedList& d)
        {
            header = new DNode<E>;
            tailer = new DNode<E>;
            header->next = tailer;
            tailer->prev = header;

            DNode<E>* p = d.header->next;
            while(p != d.tailer)
            {
                add(tailer, p->elem);
                p = p->next;
            }
        }

        DLinkedList(const DLinkedList& d)
        {
            init(d);
        }

        DLinkedList& operator = (const DLinkedList& d)
        {
            if(this == &d)
            {
                return *this;
            }
            while(!empty())
            {
                removeFront();
            }
            delete header;
            delete tailer;
            init(d);
            return *this;
        }

        DLinkedList(DLinkedList&& d)
        {
            header = new DNode<E>;
            tailer = new DNode<E>;

            header->next = d.header->next;
            d.header->next->prev = header;
            tailer->prev = d.tailer->prev;
            d.tailer->prev->next = tailer;

            d.header->next = d.tailer;
            d.tailer->prev = d.header;
        }

        bool empty() const
        {
            return (header->next == tailer);
        }

        const E& front() const
        {
            if(empty())
            {
                throw std::logic_error("DLinkedList empty");
            }
            return header->next->elem;
        }

        const E& back() const
        {
            if(empty())
            {
                throw std::logic_error("DLinkedList empty");
            }
            return tailer->prev->elem;
        }

        void addFront(const E& e)
        {
            add(header->next, e);
        }

        void addBack(const E& e)
        {
            add(tailer, e);
        }

        void removeFront()
        {
            if(empty())
            {
                throw std::logic_error("DLinkedList empty");
            }
            remove(header->next);
        }

        void removeBack()
        {
            if(empty())
            {
                throw std::logic_error("DLinkedList empty");
            }
            remove(tailer->prev);
        }

        void display()const
        {
            DNode<E>* p = header->next;
            while(p != tailer)
            {
                std::cout << p->elem << ",";
                p = p ->next;
            }
            std::cout << std::endl;
        }

    protected:
        // insert new node before v
        void add(DNode<E>* v, const E& e)
        {
            DNode<E>* u = new DNode<E>;
            u->elem = e;

            u->next = v;
            u->prev = v->prev;
            v->prev->next = u;
            v->prev = u;
        }

        void remove(DNode<E>* v)
        {
            v->prev->next = v->next;
            v->next->prev = v->prev;
            delete v;
        }

    private:
        DNode<E>* header;
        DNode<E>* tailer;
};

4. 循环链表

// CLinkedList.hpp
_Pragma("once")

#include <iostream>

template <typename E>
class CLinkedList;

template <typename E>
class CNode
{
    private:
        E elem;
        CNode<E>* next;
    friend CLinkedList<E>;
};


template <typename E>
class CLinkedList
{
    public:
        CLinkedList()
            :cursor(nullptr)
        {
        }

        ~CLinkedList()
        {
            while(!empty())
            {
                remove();
            }
        }

        void init(const CLinkedList& c)
        {
            cursor = nullptr;
            if(c.empty())
            {
                return;
            }
            cursor = new CNode<E>;
            cursor->elem = c.cursor->elem;
            cursor->next = cursor;

            CNode<E>* p = c.cursor->next;
            CNode<E>* q = cursor;
            while(p != c.cursor)
            {
                CNode<E>* tmp = new CNode<E>;
                tmp->elem = p->elem;
                tmp->next = cursor;
                q->next = tmp;
                p = p->next;
                q = q->next;
            }
        }

        CLinkedList(const CLinkedList& c)
        {
            init(c);
        }

        CLinkedList& operator = (const CLinkedList& c)
        {
            if(this == &c)
            {
                return *this;
            }
            while(!empty())
            {
                remove();
            }
            init(c);
            return *this;
        }

        CLinkedList(CLinkedList&& c )
        {
            cursor = c.cursor;
            c.cursor = nullptr;
        }

        bool empty() const
        {
            return (nullptr == cursor);
        }

        const E& front() const// elem following cursor
        {
            if(empty())
            {
                throw std::logic_error("CLinkedList empty");
            }
            return cursor->next->elem;
        }

        const E& back() const// elem at cursor
        {
            if(empty())
            {
                throw std::logic_error("CLinkedList empty");
            }
            return cursor->elem;
        }

        void advance()// advance cursor
        {
            if(empty())
            {
                return;
            }
            cursor = cursor->next;
        }

        void add(const E& e)// add node after cursor
        {
            CNode<E> * v = new CNode<E>;
            v->elem = e;
            if(empty())
            {
                v->next = v;
                cursor = v;
            }
            else
            {
                v->next = cursor->next;
                cursor->next = v;
            }
        }

        void remove()// remove node after cursor
        {
            if(empty())
            {
                throw std::logic_error("CLinkedList empty");
            }

            CNode<E> * old = cursor->next;
            if(old == cursor){
                cursor = NULL;
            }
            else
            {
                cursor->next = old->next;
            }
            delete old;
        }

        void display()const
        {
            if(empty())
            {
                return;
            }
            CNode<E>* p = cursor->next;
            while(p != cursor)
            {
                std::cout << p->elem << "," ;
                p = p->next;
            }
            std::cout << p->elem ;
            std::cout << std::endl;
        }

    private:
        CNode<E>* cursor;
};

5. 栈

1)用数组实现栈

_Pragma("once")

#include <iostream>

enum {DEF_CAPACITY = 100};

template <typename E>
class ArrayStack
{
    public:
        ArrayStack(int cap = DEF_CAPACITY)
            :s(new E[cap]), capacity(cap), t(-1)
        {
        }
        
        ~ArrayStack()
        {
            if(s != nullptr)
            {
                delete []s;
            }
        }

        void init(const ArrayStack& a)
        {
            capacity = a.capacity;
            t = a.t;
            s = new E[capacity];
            for(int i = 0; i <= t; ++i)
            {
                s[i] = a.s[i];
            }
        }

        ArrayStack(const ArrayStack& a)
        {
            init(a);
        }

        ArrayStack& operator = (const ArrayStack& a)
        {
            if(this == &a)
            {
                return *this;
            }
            if(s != nullptr)
            {
                delete []s;
            }
            init(a);
            return *this;
        }

        ArrayStack(ArrayStack&& a)
        {
            capacity = a.capacity;
            t = a.t;
            s = a.s;
            a.s = nullptr;
        }

        int size()const
        {
            return (t + 1);
        }

        bool empty()const
        {
            return (t < 0);
        }

        const E& top() const
        {
            if(empty())
            {
                throw std::logic_error("ArrayStack empty");
            }
            return s[t];
        }

        void push(const E& e)
        {
            if(size() == capacity)
            {
                throw std::logic_error("ArrayStack full");
            }
            s[++t] = e;
        }

        void pop()
        {
            if(empty())
            {
                throw std::logic_error("ArrayStack empty");
            }
            --t;
        }

        void display()const
        {
            for(int i = 0; i <= t; ++i)
            {
                std::cout << s[i] << ",";
            }
            std::cout << std::endl;
        }

    private:
        E* s;
        int capacity;
        int t;
};

2)用单链表实现栈

// LinkedStack.hpp
_Pragma("once")

#include "SLinkedList.hpp"
#include <iostream>

template <typename E>
class LinkedStack
{
    public:
        LinkedStack()
            :s(), n(0)
        {
        }

        LinkedStack(const LinkedStack& ls)
            :s(ls.s), n(ls.n)
        {
        }

        LinkedStack& operator = (const LinkedStack& ls)
        {
            if(this == &ls)
            {
                return *this;
            }
            s = ls.s;
            n = ls.n;
            return *this;
        }
        
        // 若对象s没有移动构造函数,则调用其拷贝构造函数。
        LinkedStack(LinkedStack&& ls)
            :s(std::move(ls.s)), n(ls.n)
        {
        }

        int size()const
        {
            return n;
        }

        bool empty()const
        {
            return (0 == n);
        }

        const E& top() const
        {
            if(empty())
            {
                throw std::logic_error("LinkedStack empty");
            }
            return s.front();
        }

        void push(const E& e)
        {
            ++n;
            s.addFront(e);
        }

        void pop()
        {
            if(empty())
            {
                throw std::logic_error("LinkedStack empty");
            }
            --n;
            s.removeFront();
        }

        void display()const
        {
            s.display();
        }

    private:
        SLinkedList<E> s;
        int n;
};

6. 单向队列

1)循环数组实现的单向队列

// CArrayQueue.hpp
_Pragma("once")

#include <iostream>

enum {DEF_CAPACITY = 100};

template <typename E>
class CArrayQueue
{
    public:
        CArrayQueue(int cap = DEF_CAPACITY)
            :array(new E[cap + 1]), capacity(cap + 1), f(0), r(0)
        {
        }
        
        ~CArrayQueue()
        {
            if(array != nullptr)
            {
                delete []array;
            }
        }
        
        void init(const CArrayQueue& ca)
        {
            capacity = ca.capacity;
            f = ca.f;
            r = ca.r;
            array = new E[capacity];
            int tmp = f;
            while(tmp != r)
            {
                array[tmp] = ca.array[tmp];
                tmp = ((tmp + 1) % capacity);
            }
        }

        CArrayQueue(const CArrayQueue& ca)
        {
            init(ca);
        }

        CArrayQueue& operator = (const CArrayQueue& ca)
        {
            if(this == &ca)
            {
                return *this;
            }
            if(array != nullptr)
            {
                delete []array;
            }
            init(ca);
            return *this;
        }

        CArrayQueue(CArrayQueue&& ca)
        {
            capacity = ca.capacity;
            f = ca.f;
            r = ca.r;
            array = ca.array;
            ca.array = nullptr;
        }

        int size()const
        {
            if(nullptr == array)
            {
                return 0;
            }
            return ((r - f + capacity) % capacity);
        }

        bool empty()const
        {
            return (0 == size());
        }

        bool full()const
        {
            return (((r + 1) % capacity) == f);
        }

        const E& front() const
        {
            if(empty())
            {
                throw std::logic_error("CArrayQueue empty");
            }
            return array[f];
        }

        const E& back() const
        {
            if(empty())
            {
                throw std::logic_error("CArrayQueue empty");
            }
            return array[(r - 1  + capacity) % capacity];
        }

        void dequeue()
        {
            if(empty())
            {
                throw std::logic_error("CArrayQueue empty");
            }
            f = ((f + 1) % capacity);
        }

        void enqueue(const E& e)
        {
            if(full())
            {
                throw std::logic_error("CArrayQueue full");
            }
            array[r] = e;
            r = ((r + 1) % capacity);
        }

        void display()const
        {
            if(empty())
            {
                return;
            }
            int tmp = f;
            while(tmp != r)
            {
                std::cout << array[tmp] << ",";
                tmp = ((tmp + 1) % capacity);
            }
            std::cout << std::endl;
        }

private:
        E* array;// actual size of array  == capacity + 1, waste an elem space.
        int capacity;
        int f;// point to the front elem
        int r;// point to the elem following the last elem
};

2)用循环链表实现的单向队列

// CLinkedQueue.hpp
_Pragma("once")

#include "CLinkedList.hpp"
#include <iostream>


template <typename E>
class CLinkedQueue
{
    public:
        CLinkedQueue()
            :c(), n(0)
        {
        }

        CLinkedQueue(const CLinkedQueue& clq)
            :c(clq.c), n(clq.n)
        {
        }

        CLinkedQueue& operator = (const CLinkedQueue& clq)
        {
            if(this == &clq)
            {
                return *this;
            }
            c = clq.c;
            n = clq.n;
            return *this;
        }

        CLinkedQueue(CLinkedQueue&& clq)
            :c(std::move(clq.c)), n(clq.n)
        {
        }

        int size()const
        {
            return n;
        }

        bool empty()const
        {
            return (0 == n);
        }

        const E& front() const
        {
            if(empty())
            {
                throw std::logic_error("CLinkedQueue empty");
            }
            return c.front();
        }

        const E& back() const
        {
            if(empty())
            {
                throw std::logic_error("CLinkedQueue empty");
            }
            return c.back();
        }

        void dequeue()
        {
            if(empty())
            {
                throw std::logic_error("CLinkedQueue empty");
            }
            c.remove();
            --n;
        }

        void enqueue(const E& e)
        {
            c.add(e);
            c.advance();
            ++n;
        }

        void display()const
        {
            c.display();
        }

    private:
        CLinkedList<E> c;
        int n;
};

7. 双向队列(用双向链表实现)

// DLinkedQueue.hpp
_Pragma("once")

#include "DLinkedList.hpp"
#include <iostream>


template <typename E>
class DLinkedQueue
{
    public:
        DLinkedQueue()
            :d(), n(0)
        {
        }

        DLinkedQueue(const DLinkedQueue& dlq)
            :d(dlq.d), n(dlq.n)
        {
        }

        DLinkedQueue& operator = (const DLinkedQueue& dlq)
        {        
            if(this == &dlq)
            {
                return *this;
            }
            d = dlq.d;
            n = dlq.n;
            return *this;
        }

        DLinkedQueue(DLinkedQueue&& dlq)
            :d(std::move(dlq.d)), n(dlq.n)
        {
        }

        int size()const
        {
            return n;
        }

        bool empty()const
        {
            return (0 == n);
        }

        const E& front() const
        {
            if(empty())
            {
                throw std::logic_error("DLinkedQueue empty");
            }
            return d.front();
        }

        const E& back() const
        {
            if(empty())
            {
                throw std::logic_error("DLinkedQueue empty");
            }
            return d.back();
        }

        void insertFront(const E& e)
        {
            d.addFront(e);
            ++n;
        }

        void insertBack(const E& e)
        {
            d.addBack(e);
            ++n;
        }

        void removeFront()
        {
            if(empty())
            {
                throw std::logic_error("DLinkedQueue empty");
            }
            d.removeFront();
            --n;
        }

        void removeBack()
        {
            if(empty())
            {
                throw std::logic_error("DLinkedQueue empty");
            }
            d.removeBack();
            --n;
        }

        void display()const
        {
            d.display();
        }

    private:
        DLinkedList<E> d;
        int n;
};

8. 树

树是一种非线性数据结构,是数据组织方式的一种突破,在各个系统中都能见到树这种数据结构,包括文件系统、数据库等。生产力专家说,突破来自于“非线性”思维。

树的一些重要概念:

  • 树:若树不为空,则其有一个特殊节点,成为根节点,它没有父节点。除了根节点的其他节点只有唯一一个父节点。
  • 兄弟节点( siblings ):拥有相同父节点的节点称为兄弟节点。
  • 外部节点(叶子节点):没有子节点的节点称为外部节点或者叶子节点。
  • 内部节点:有一个或者多个子节点的节点称为内部节点。
  • 深度:若节点p是根节点,则其深度为0;否则,节点p的深度是其父亲节点的深度+1(深度可以理解成水的深度,水面(对应根节点)的深度为0,越往下深度越大)。树的层定义和其相同
  • 高度:若节点p是叶子节点,则其高度为0;否则,节点p的高度是其众多孩子节点中最大的高度+1(高度可以理解楼宇的高度,地面一层(对应叶子节点)的高度为0,越往上高度越大)。
  • 二叉树:每个结点最多有两个子树的树结构。通常子树被称作“左子树”(left subtree)和“右子树”(right subtree)。
  • 完全二叉树:如果每一层(最后一层除外)都被完全填充,并且最后一层上的所有叶子都尽可能地放在最左边(若没有完全填充的话),那么这个二叉树就是完全二叉树。
  • 二叉堆树:二叉堆树是一个完全二叉树,并且每个节点的值(或者说优先级,二叉堆树一般用于实现优先级队列)都大于等于其后代节点的值。(默认是大顶堆,反之则是小顶堆)
  • 二叉搜索树:若二叉树不为空,则:左子树的所有节点的值都小于根节点的值,右子树的所有节点的值都大于根节点的值,左子树和右子树自身都是二叉搜索树。
  • 自平衡AVL树:每个结点的左右子树的高度之差的绝对值(平衡因子)最多为1的二叉搜索树。
  • B-树:B-树是自平衡二叉搜索树的一种推广,其中每个节点可以拥有多个搜索键,并且有多个子节点。该结构可以实现更高效的自平衡,并且更加利于将节点数据保存在外部存储,例如硬盘中。
  • 前序遍历:先访问父节点,再访问子节点(按照既定的子节点顺序),按照这种方式递归遍历整个树。
  • 后序遍历:先访问子节点(按照既定的子节点顺序),再访问父节点,按照这种方式递归遍历整个树。可用于计算表达式。
  • 中序遍历:只对二叉树适用,先访问左子节点,接着访问父节点,最后访问右节点,按照这种方式递归遍历整个树。

普通二叉树的一些有用特性(n代表节点的个数,nE代表叶子节点的个数,nI代表非叶子节点的个数,h代表树的高度):

</svg>" width="662">

完全二叉树的特性:对于非空的完全二叉树,叶子节点的数目比非叶子节点的数目多1个。

</svg>" width="682">

完全二叉树采用数组存储方式的一些有用特性(下标从1开始,下标0不用,方便计算):

  • 第i层的节点对应的数组的下标是2^i,...,2^(i+1) - 1。
  • 数组下标i对应的节点所在的层为 向下取整。
  • 数组下标i的节点,若有孩子节点,则其对应的下标分别为2 * i, 2 * i + 1, 其父节点的下标是 i / 2 。

B-树(m阶)的特性:

  • 每个节点最多有m个子节点。
  • 每个非叶子节点(根节点除外)最少有m / 2 子节点。
  • 根节点,若不是叶子节点,则至少有2个子节点。
  • 拥有c个子节点的节点拥有c - 1 个搜索key,这些搜索key按序排列,并将它的子树分隔开来。
  • 所有叶子节点在同一层。
  • 拥有n个搜索key的m阶B-树,树的高度介于如下范围:
</svg>" width="334">

二叉堆树的实现(完全二叉树的特例,底层用数组实现):

// BinaryHeapTree.hpp
_Pragma("once")

#include <stdint.h>

class BinaryHeapTree
{
    public:
        BinaryHeapTree(int32_t capacity = 100);
        BinaryHeapTree(int32_t capacity, int32_t* array, int32_t n);
        BinaryHeapTree(const BinaryHeapTree& bht);
        BinaryHeapTree(BinaryHeapTree&& bht);
        BinaryHeapTree& operator = (const BinaryHeapTree& bht);
        ~BinaryHeapTree();

        bool empty() const;
        int32_t getRootValue() const;
        int32_t getLastLeafValue() const;
        bool insert(int32_t value);
        bool deleteRootValue();
        bool deleteValue(int32_t curIndex);
        void display() const;

    private:
        bool isRoot(int32_t curIndex) const;
        int32_t getLevelNum(int32_t curIndex) const;// 获取当前节点的层号,根节点的层号是0
        int32_t getParentIndex(int32_t curIndex) const;
        int32_t getLeftChildIndex(int32_t curIndex) const;
        int32_t getRightChildIndex(int32_t curIndex) const;

    private:
        void bubbleUp(int32_t curIndex);
        void bubbleDown(int32_t curIndex);

    private:
        void init(const BinaryHeapTree& bht);

    private:
        int32_t capacity;
        int32_t eleNum;
        int32_t * entries;// 下标0不用,从下标1开始
};

// BinaryHeapTree.cpp

#include "BinaryHeapTree.hpp"
#include <iostream>
#include<cmath>

BinaryHeapTree::BinaryHeapTree(int32_t capacity)
{
    std::cout << "BinaryHeapTree::BinaryHeapTree(int capacity)" << std::endl;
    this->capacity = capacity;
    eleNum = 0;
    entries = new int32_t[capacity + 1];// 下标0不用,浪费一个元素,方便计算
}

// O(n), n为节点个数。
BinaryHeapTree::BinaryHeapTree(int32_t capacity, int32_t* array, int32_t n)
{
    if(capacity < n)
    {
        throw std::logic_error("BinaryHeapTree capacity < n");
    }
    this->capacity = capacity;
    this->eleNum = n;
    this->entries = new int32_t[capacity + 1];// 下标0不用,浪费一个元素,方便计算
    memcpy(entries + 1, array, n * sizeof(int32_t));
    for(int32_t i = n / 2; i > 0; --i)
    {
        bubbleDown(i);
    }
}

void BinaryHeapTree::init(const BinaryHeapTree& bht)
{
    capacity = bht.capacity;
    eleNum = bht.eleNum;
    entries = new int32_t[capacity + 1];// 下标0不用,浪费一个元素,方便计算
    memcpy(entries, bht.entries, (capacity + 1) * sizeof(int32_t));
}

BinaryHeapTree::BinaryHeapTree(const BinaryHeapTree& bht)
{
    std::cout << "BinaryHeapTree::BinaryHeapTree(const BinaryHeapTree& bht)" << std::endl;
    init(bht);
}

BinaryHeapTree::BinaryHeapTree(BinaryHeapTree&& bht)
{
    std::cout << "BinaryHeapTree::BinaryHeapTree(BinaryHeapTree&& bht)" << std::endl;
    capacity = bht.capacity;
    eleNum = bht.eleNum;
    entries = bht.entries;
    bht.entries = nullptr;
}

BinaryHeapTree& BinaryHeapTree::operator = (const BinaryHeapTree& bht)
{
    std::cout << "BinaryHeapTree& BinaryHeapTree::operator = (const BinaryHeapTree& bht)" << std::endl;
    if(this == &bht)
    {
        return *this;
    }
    if(entries != nullptr)
    {
        delete []entries;
    }
    init(bht);
    return *this;
}

BinaryHeapTree::~BinaryHeapTree()
{
    if(entries != nullptr)
    {
        delete []entries;
    }
}

bool BinaryHeapTree::empty() const
{
    return (0 == eleNum);
}

int32_t BinaryHeapTree::getRootValue() const
{
    if(empty())
    {
        throw std::logic_error("BinaryHeapTree empty");
    }
    return entries[1];
}

int32_t BinaryHeapTree::getLastLeafValue() const
{
    if(empty())
    {
        throw std::logic_error("BinaryHeapTree empty");
    }
    return entries[eleNum];
}

bool BinaryHeapTree::insert(int32_t value)
{
    if(eleNum >= capacity)
    {
        throw std::logic_error("BinaryHeapTree full");
    }
    ++eleNum;
    entries[eleNum] = value;
    bubbleUp(eleNum);
    return true;
}

bool BinaryHeapTree::deleteRootValue()
{
    if(empty())
    {
        throw std::logic_error("BinaryHeapTree empty");
    }
    entries[1] = entries[eleNum];
    --eleNum;
    bubbleDown(1);
    return true;
}

bool BinaryHeapTree::deleteValue(int32_t curIndex)
{
    if(empty())
    {
        throw std::logic_error("BinaryHeapTree empty");
    }
    if(curIndex > eleNum)
    {
        throw std::logic_error("BinaryHeapTree curIndex invalid");
    }
    entries[curIndex] = entries[eleNum];
    --eleNum;
    bubbleUp(curIndex);
    bubbleDown(curIndex);
    return true;
}

bool BinaryHeapTree::isRoot(int32_t curIndex) const
{
    return (1 == curIndex);
}

int32_t BinaryHeapTree::getLevelNum(int32_t curIndex) const
{
    return std::log(curIndex) / std::log(2);
}

int32_t BinaryHeapTree::getParentIndex(int32_t curIndex) const
{
    return (curIndex / 2);
}

int32_t BinaryHeapTree::getLeftChildIndex(int32_t curIndex) const
{
    return (2 * curIndex);
}

int32_t BinaryHeapTree::getRightChildIndex(int32_t curIndex) const
{
    return (2 * curIndex + 1);
}

// 任何节点都只能有一个父节点(根节点除外),所以bubbleUp相对简单一些
void BinaryHeapTree::bubbleUp(int32_t curIndex)
{
    if(isRoot(curIndex))
    {
        return;
    }
    int32_t parentIndex = getParentIndex(curIndex);
    if(entries[curIndex] > entries[parentIndex])
    {
        std::swap(entries[curIndex], entries[parentIndex]);
        bubbleUp(parentIndex);
    }
}

// 节点可能没有子节点、只有左子节点、有左右两个子节点,不同的情况处理不同,bubbleDown比bubbleUp复杂一些。
void BinaryHeapTree::bubbleDown(int32_t curIndex)
{
    int32_t leftChildIndex = getLeftChildIndex(curIndex);
    int32_t rightChiledIndex = getRightChildIndex(curIndex);
    if(leftChildIndex > eleNum)
    {
        // 没有子节点,无需比较和交换
        return;
    }
    else if(eleNum < rightChiledIndex)
    {
        // 只有左子节点,没有右子节点,根据完全二叉树(二叉堆树)的定义,这个左子节点是不会有任何子节点的。
        if(entries[curIndex] < entries[leftChildIndex])
        {
            std::swap(entries[curIndex], entries[leftChildIndex]);
        }
    }
    else
    {
        // 有左、右两个子节点, 左右子节点是可能含有子节点的,所以需要继续比较和交互.
        if(entries[leftChildIndex] < entries[rightChiledIndex])
        {
            if(entries[curIndex] < entries[rightChiledIndex])
            {
                std::swap(entries[curIndex], entries[rightChiledIndex]);
                bubbleDown(rightChiledIndex);
            }
        }
        else
        {
            if(entries[curIndex] < entries[leftChildIndex])
            {
                std::swap(entries[curIndex], entries[leftChildIndex]);
                bubbleDown(leftChildIndex);
            }
        }
    }
}

void BinaryHeapTree::display() const
{
    for(int32_t i = 1 ; i <= eleNum; ++i)
    {
        std::cout << entries[i] << ",";
    }
    std::cout << std::endl;
}

9. 优先级队列

优先级队列可以用线性方式实现(例如,vector,list)但是性能不高(要么浪费在插入方面,要么浪费在查找方面,整体是O(n^2)),也可以采用非线性 方式(二叉堆树,插入和查找性能都在O(nlogn),代码见上面),性能能够得到提高。

二叉堆树根据堆顶元素是最小值还是最大值分为小顶堆和大顶堆,C++ STL采用大顶堆的方式。大顶堆需要实现Greater Than 比较器,小顶堆需要实现Less Than 比较器。

#include <iostream>

template <typename E>
struct GreaterThan
{
    public:
        bool operator ()(const E& x, const E&y) const
        {
            return x > y;
        }
};

template <typename E>
struct LessThan
{
    public:
        bool operator ()(const E& x, const E&y) const
        {
            return x < y;
        }
};

template <typename E>
struct GreaterThan2
{
    public:
        GreaterThan2(const E&x, const E&y)
            :_x(x), _y(y)
        {
        }

        bool operator ()() const
        {
            return _x > _y;
        }
    private:
        E _x;
        E _y;
};

int main() {
   std::cout << "GreaterThan<int>(10, 20)=" 
       << GreaterThan<int>()(10, 20) << std::endl;// 打印0
   std::cout << "GreaterThan<int>(20, 10)=" 
       << GreaterThan<int>()(20, 10) << std::endl;// 打印1
   std::cout << "GreaterThan<double>(21.45, 10.08)=" 
       << GreaterThan<double>()(21.45, 10.08) << std::endl;// 打印1

   std::cout << "LessThan<int>(10, 20)=" 
       << LessThan<int>()(10, 20) << std::endl;// 打印1
   std::cout << "LessThan<int>(20, 10)=" 
       << LessThan<int>()(20, 10) << std::endl;// 打印0
   std::cout << "LessThan<double>(21.45, 10.08)=" 
       << LessThan<double>()(21.45, 10.08) << std::endl;// 打印0

   std::cout << "GreaterThan2<int>(1, 2)()=" 
       << GreaterThan2<int>(1, 2)() << std::endl;// 打印0

   return 0;
}

10. Hash Table(Unordered Map)

1)Bucket Arrays

用Bucket Arrays 表示的Hash Table是大小为N的数组A,其中A的每个单元格都被认为是“bucket”(即键值对的集合),整数N定义数组的容量。如果键是在范围[0,N-1]内均匀分布的整数,那么就只需要这个bucket数组。带有键k的条目e被简单地插入bucket A[k]。

</svg>" width="2880">

2)Hash Function

Hash Function包括两部分,分别是Hash Code和Compression Function

</svg>" width="1108">

3) 解决冲突的方法

  • 分离链接法(Separate Chaining),Bucket Array的每个元素是一个小型的“Map”,存储冲突的Entry(K,V),“Map”可以用链表实现也可以用红黑树实现,通过阈值控制,超过则将链表转成红黑树,提高桶中元素的查询、删除、插入速度。
  • 开放寻址法(Open Addressing):线性探测;二次探测法;二次Hash法。这些开放寻址方案比分离链接法节省了一些空间,但它们不一定更快,当内存空间不是主要问题的时候,优先选择分离链接法。

4)负载因子和ReHashing

在上述所有哈希表方案中,负载因子λ = n / N应保持在1以下,一些实验数据表明,对于开放式寻址方案,λ须小于0.5,对于独立链式寻址方案,λ须小于0.9。当某个哈希表的负载因子超过阈值,需要重新分配更大的空间、并将原数据插入到新的空间,这个过程叫做Rehashing。即使定期Rehashing,哈希表也是实现无序映射的有效方法。如果每次Rehashing操作都将表的大小增加一倍,那么就可以将Rehashing表中所有元素的成本与最初插入它们的时间进行分摊。

5)采用分离链接法实现的一个HashMap

// 用法:
//    HashMap<int, int, PHashCode<int>> hashMap(hashCode<int>, 30);
//    hashMap.put(1, 4);
//    hashMap.put(10, 24);
//    hashMap.put(15, 54);
//    hashMap.put(20, 34);
//    hashMap.display();

_Pragma("once")
#include <list>
#include <vector>
#include <utility>
#include <iostream>

using namespace std;

template <typename K>
using PHashCode = int (*)(const K& k);

template <typename K>
int hashCode(const K& k)
{
    return (int)(k);
}

template <>
int hashCode(const long& k)
{
    return ((int)(k) + (k >> 32));
}

template<>
int hashCode(const string& k)
{
    const char* p = k.c_str();
    int len = k.size();
    int h = 0;
    for (int i = 0; i < len; i++) {
        h = (h << 5) | (h >> 27);
        h += (int) p[i];
    }
    return h;
}

template <>
int hashCode(const float& k)
{
    int len = sizeof(k);
    const char* p = reinterpret_cast<const char*>(&k);
    string s(p, len);
    return hashCode(s);
}

template <>
int hashCode(const double& k)
{
    int len = sizeof(k);
    const char* p = reinterpret_cast<const char*>(&k);
    string s(p, len);
    return hashCode(s);
}

template <typename K, typename V>
using Entry = pair<K, V>;

// 数组中的一个桶
template <typename K, typename V>
using Bucket = std::list<Entry<K, V>>;

// 由多个桶组成的数组
template <typename K, typename V>
using BktArray = std::vector<Bucket<K,V>>;

template <typename K, typename V>
using BItor = typename BktArray<K, V>::iterator;

template <typename K, typename V>
using EItor = typename Bucket<K, V>::iterator;

template <typename K, typename V, typename H>
class HashMap
{
    class Iterator;
    public:
        HashMap(H h = hashCode<K>, int capacity = 101)
            :_hash(h), _n(0), _ba(capacity)
        {
        }

        HashMap& operator = (const HashMap& hashMap)
        {
            if(this == &hashMap)
            {
                return *this;
            }
            _hash = hashMap._hash;
            _n = hashMap._n;
            _ba = hashMap._ba;
            return *this;
        }

        HashMap(const HashMap& hashMap)
            :_hash(hashMap._hash), _n(hashMap._n), _ba(hashMap._ba)
        {
        }

        HashMap(HashMap&& hashMap)
        {
            _hash = hashMap._hash;
            _n = hashMap._n;
            _ba = std::move(hashMap._ba);
            hashMap._n = 0;
        }

        int size() const
        {
            return _n;
        }

        bool empty() const
        {
            return 0 == size();
        }

        Iterator find(const K& k)
        {
            auto p = finder(k);
            if (endOfBk(p))
            {
                return end();
            }
            else
            {
                return p;
            }
        }

        Iterator put(const K& k, const V& v)
        {
            auto p = finder(k);
            if (endOfBk(p)) {
                return inserter(p, Entry<K, V>(k, v));
            }
            else
            {
                (*(p._ei)).second = v;
                return p;
            }
        }

        void erase(const K& k)
        {
            auto p = finder(k);
            if (endOfBk(p))
            {
                return;
            }
            eraser(p);
        }

        void erase(const Iterator& p)
        {
            eraser(p);
        }

        Iterator begin()
        {
            if (empty())
            {
                return end();
            }
            auto bi = _ba.begin();
            while((*bi).empty())
            {
                ++bi;
            }
            return Iterator(_ba, bi, (*bi).begin());
        }

        Iterator end()
        {
            return Iterator(_ba, _ba.end());
        }

        void display()
        {
            auto p = begin();
            auto q = end();
            while(p != q)
            {
                std::cout << "[" << (*p).first << "," << (*p).second <<"]"<< ";";
                ++p;
            }
            std::cout << std::endl;
        }

    protected:
        Iterator finder(const K& k)
        {
            int i = (_hash)(k) % _ba.size();
            auto bi = _ba.begin() + i;
            Iterator p(_ba, bi, (*bi).begin());
            while (!endOfBk(p) && (*p).first != k)
            {
                nextEntry(p);
            }
            return p;
        }

        Iterator inserter(const Iterator& p, const Entry<K, V>& e)
        {
            auto ins = (*(p._bi)).insert(p._ei, e);
            ++_n;
            return Iterator(_ba, p._bi, ins);
        }

        void eraser(const Iterator& p)
        {
            p._bi->erase(p._ei);
            --_n;
        }

        static void nextEntry(Iterator& p)
        {
            ++p._ei;
        }

        static bool endOfBk(const Iterator& p)
        {
            return p._ei == (*(p._bi)).end();
        }
    private:
        H _hash;//hash 函数
        int _n;
        BktArray<K, V> _ba;
    private:
        class Iterator {
            public:
                Iterator(const BktArray<K, V>& ba, const BItor<K, V>& bi, const EItor<K, V>& ei = EItor<K, V>())
                    : _pba(&ba), _bi(bi), _ei(ei)
                {
                }

                Entry<K, V>& operator*() const
                {
                    return *_ei;
                }

                bool operator != (const Iterator& p) const
                {
                    return !(*this == p);
                }

                bool operator==(const Iterator& p) const
                {
                    if (_pba != p._pba || _bi != p._bi)
                    {
                        return false;
                    }
                    else if (_bi == (*_pba).end())
                    {
                        return true;
                    }
                    else
                    {
                        return (_ei == p._ei);
                    }
                }
                //指向下一个entry,可能是本桶的,也可能是另外一个桶的第一个元素.
                Iterator& operator++()
                {
                    ++_ei;
                    if (endOfBk(*this)) {
                        ++_bi;
                        while (_bi != (*_pba).end() && (*_bi).empty())
                        {
                            ++_bi;
                        }
                        if (_bi == (*_pba).end())
                        {
                            return *this;
                        }
                        _ei = (*_bi).begin();
                    }
                    return *this;
                }


                friend HashMap<K, V, H>;
            private:
                const BktArray<K, V>* _pba;//最外围的数组
                BItor<K, V> _bi;//指向某个桶
                EItor<K, V> _ei;//指向某个entry
        };
};

文章首发于微信公众号【jameswhale的技术人生】,因精力有限,会不定期将文章同步到知乎平台。

</svg>" width="1122">