Disjoint Sets
基本操作
make_set(x) 将一个vertex变成一个disjoint
union(x, y) 将包含vertex x 的 set 和 包含vertex y 的 set 并起来
find_set(x) 返回一个指针,指向包含这个vertex 的唯一的 set
Basic implementation
最基本的应用是来确定一个 undirected graph 中的 connected components.
连接components:
connected_component(G)
for each vertex v in G
make_set(v)
for each edge(u, v) e in G
if (find_set(u) != find_set(v))
union(u, v)
判断两个vertices是否连接在同一component中:
same_component(u, v)
if find_set(u) == find_set(v)
return true
else
return false
Example
Number of Connected Components in an Undirected Graph
class Solution {
public:
//compress the path and return the root of the union set
int find_set(vector<int> & allsets, int node) {
if (node != allsets[node]) {
allsets[node] = find_set(allsets, allsets[node]);
}
return allsets[node];
}
void union_set(vector<int> & allsets, int x, int y) {
int xroot = find_set(allsets, x);
int yroot = find_set(allsets, y);
allsets[yroot] = xroot;
}
int countComponents(int n, vector<pair<int, int>>& edges) {
//allsets[x] : get the parent of set include x
vector<int> allsets(n);
int count = 0;
for (int i = 0; i < n; i++) {
allsets[i] = i;
}
for (auto edge : edges) {
int x = find_set(allsets, edge.first);
int y = find_set(allsets, edge.second);
if (x != y) {
union_set(allsets, edge.first, edge.second);
count++;
}
}
return n - count;
}
};
Optimization - Disjoint-Set Forest
除了可以用链表/数组来实现 disjoint-set, disjoint-set forest是一种比链表实现更快的实现。我们将sets表示为rooted trees。 每一个 node 包括一个 vertex,每一棵树代表一个set. 所有的node都指向各自的parent. 在树中那个指向自己的显然就是tree root。这种数据结构高效的原因是用了以下两种技巧:”union by rank” and “path compression”.
Union By Rank
为每个node维护一个变量rank。 rank代表这这个node的高度的上限。在并集的过程中,我们依据rank的高低,把低rank的root指向高rank的root。
Path compression
路径压缩并不改变任何rank,只是把tree中的所有node都指向这颗树的root。这个过程在find_set这个操作中实现。
实现
make_set(x)
x.p = x
x.rank = 0
union(x, y)
link(find_set(x), find_set(y))
link(x,y)
if x.rank > y.rank
y.p = x
else if x.rank < y.rank
x.p = y
else
x.p = y
y.rank = y.rank + 1
find_set(x)
if x != x.p
x.p = find_set(x.p)
return x.p
时间复杂度
时间复杂度为 $O(m \alpha(n))$, union_set takes amortized n time complexity.
空间复杂度为 O(n)
TODO: 分析
Detect Cycle in a undirected graph
class Edge {
public:
int src;
int dest;
Edge() {;}
Edge(int s, int d);
};
Edge::Edge(int s, int d) {
src = s;
dest = d;
}
class Graph {
public:
int V;
int E;
vector<Edge> edges;
Graph(int V, int E);
void addEdge(int start, int end);
};
Graph::Graph(int V, int E) {
this->V = V;
this->E = E;
this->edges.resize(E);
}
void Graph::addEdge(int start, int end) {
Edge e(start, end);
edges.emplace_back(e);
}
class DisjointSet {
public:
int parent;
int rank;
DisjointSet(int parent);
};
DisjointSet::DisjointSet(int p) {
this->parent = p;
this->rank = 0;
}
//given a node, find it's parent in its disjoint_set
int find_set(vector<DisjointSet> & allsets, int node) {
if (allsets[node].parent != node) {
allsets[node].parent = find_set(allsets, allsets[node].parent);
}
return allsets[node].parent;
}
void union_set(vector<DisjointSet> & allsets, int x, int y) {
int xp = find_set(allsets, x);
int yp = find_set(allsets, y);
if (allsets[xp].rank > allsets[yp].rank) {
allsets[yp].parent = xp;
}
else if (allsets[xp].rank < allsets[yp].rank) {
allsets[xp].parent = yp;
}
else {
allsets[xp].parent = yp;
allsets[yp].rank++;
}
}
bool is_cycle(const Graph & graph) {
int V = graph.V;
int E = graph.E;
vector<DisjointSet> allsets;
for (int v = 0; v < V; v++) {
allsets.emplace_back(v);
}
for (int e = 0; e < E; e++) {
int x = find_set(allsets, graph.edges[e].src);
int y = find_set(allsets, graph.edges[e].dest);
if (x == y) {
return true;
}
else {
union_set(allsets, x, y);
}
}
return false;
}
void test_is_cycle() {
Graph graph(3, 3);
graph.addEdge(0, 1);
graph.addEdge(1, 2);
graph.addEdge(0, 2);
if (is_cycle(graph)) {
cout << "has a cycle";
}
else {
cout << "does not have a cycle";
}
}