Data Structures for Disjoint Sets

Disjoint Sets

基本操作

  1. make_set(x) 将一个vertex变成一个disjoint

  2. union(x, y) 将包含vertex x 的 set 和 包含vertex y 的 set 并起来

  3. find_set(x) 返回一个指针,指向包含这个vertex 的唯一的 set

Basic implementation

最基本的应用是来确定一个 undirected graph 中的 connected components.

连接components:

connected_component(G)
    for each vertex v in G
        make_set(v)
    for each edge(u, v) e in G
        if (find_set(u) != find_set(v))
            union(u, v)

判断两个vertices是否连接在同一component中:

same_component(u, v)
    if find_set(u) == find_set(v)
        return true
    else
        return false

Example

Number of Connected Components in an Undirected Graph

class Solution {
public:
    //compress the path and return the root of the union set
    int find_set(vector<int> & allsets, int node) {
        if (node != allsets[node]) {
            allsets[node] = find_set(allsets, allsets[node]);
        }
        return allsets[node];
    }
    
    void union_set(vector<int> & allsets, int x, int y) {
        int xroot = find_set(allsets, x);
        int yroot = find_set(allsets, y);
        allsets[yroot] = xroot;
    }
    int countComponents(int n, vector<pair<int, int>>& edges) {
        //allsets[x] : get the parent of set include x
        vector<int> allsets(n);
        int count = 0;
        for (int i = 0; i < n; i++) {
            allsets[i] = i;
        }
        for (auto edge : edges) {
            int x = find_set(allsets, edge.first);
            int y = find_set(allsets, edge.second);
            if (x != y) {
                union_set(allsets, edge.first, edge.second);
                count++;
            }
        }
        return n - count;
    }
};

Optimization - Disjoint-Set Forest

除了可以用链表/数组来实现 disjoint-set, disjoint-set forest是一种比链表实现更快的实现。我们将sets表示为rooted trees。 每一个 node 包括一个 vertex,每一棵树代表一个set. 所有的node都指向各自的parent. 在树中那个指向自己的显然就是tree root。这种数据结构高效的原因是用了以下两种技巧:”union by rank” and “path compression”.

Union By Rank

为每个node维护一个变量rank。 rank代表这这个node的高度的上限。在并集的过程中,我们依据rank的高低,把低rank的root指向高rank的root。

Path compression

路径压缩并不改变任何rank,只是把tree中的所有node都指向这颗树的root。这个过程在find_set这个操作中实现。

实现

make_set(x)
x.p = x
x.rank = 0
union(x, y)
    link(find_set(x), find_set(y))
link(x,y)
    if x.rank > y.rank
        y.p = x
    else if x.rank < y.rank
        x.p = y
    else
        x.p = y
        y.rank = y.rank + 1
find_set(x)
    if x != x.p
        x.p = find_set(x.p)
    return x.p

时间复杂度

时间复杂度为 $O(m \alpha(n))$, union_set takes amortized n time complexity.

空间复杂度为 O(n)

TODO: 分析

Detect Cycle in a undirected graph

class Edge {
public:
    int src;
    int dest;
    Edge() {;}
    Edge(int s, int d);
};

Edge::Edge(int s, int d) {
    src = s;
    dest = d;
}

class Graph {
public:
    int V;
    int E;
    vector<Edge> edges;
    Graph(int V, int E);
    void addEdge(int start, int end);
};

Graph::Graph(int V, int E) {
    this->V = V;
    this->E = E;
    this->edges.resize(E);
}

void Graph::addEdge(int start, int end) {
    Edge e(start, end);
    edges.emplace_back(e);
}


class DisjointSet {
public:
    int parent;
    int rank;
    DisjointSet(int parent);
};

DisjointSet::DisjointSet(int p) {
    this->parent = p;
    this->rank = 0;
}

//given a node, find it's parent in its disjoint_set
int find_set(vector<DisjointSet> & allsets, int node) {
    if (allsets[node].parent != node) {
        allsets[node].parent = find_set(allsets, allsets[node].parent);
    }
    return allsets[node].parent;
}

void union_set(vector<DisjointSet> & allsets, int x, int y) {
    int xp = find_set(allsets, x);
    int yp = find_set(allsets, y);
    if (allsets[xp].rank > allsets[yp].rank) {
        allsets[yp].parent = xp;
    }
    else if (allsets[xp].rank < allsets[yp].rank) {
        allsets[xp].parent = yp;
    }
    else {
        allsets[xp].parent = yp;
        allsets[yp].rank++;
    }
}

bool is_cycle(const Graph & graph) {
    int V = graph.V;
    int E = graph.E;
    vector<DisjointSet> allsets;
    for (int v = 0; v < V; v++) {
        allsets.emplace_back(v);
    }
    for (int e = 0; e < E; e++) {
        int x = find_set(allsets, graph.edges[e].src);
        int y = find_set(allsets, graph.edges[e].dest);
        if (x == y) {
            return true;
        }
        else {
            union_set(allsets, x, y);
        }
    }
    return false;
}

void test_is_cycle() {
    Graph graph(3, 3);
    graph.addEdge(0, 1);
    graph.addEdge(1, 2);
    graph.addEdge(0, 2);
    if (is_cycle(graph)) {
    	cout <<  "has a cycle";
    }
    else {
        cout << "does not have a cycle";
    }    
}

LeetCode 305 Number of Islands II