Union-Find,并查集基础
Union-Find,并查集基础
并查集是一个用于解决 disjoint sets 类问题的常用数据结构,用于处理集合中
元素的归属 find
集合的融合 union
Online algorithm, stream of input
计算 number of connected components
不支持 delete
我做并查集问题的时候,最喜欢的方式是直接无脑撸一个 weighted union find class 出来,然后在具体问题上直接调用接口。。
以下是 Princeton 的 Algorithm 算法课上的样例代码,这老头可真喜欢用 array 啊。。。他的并查集和KMP算法构建方式都稍微有点麻烦。
public class WeightedQuickUnionPathCompressionUF {
private int[] parent; // parent[i] = parent of i
private int[] size; // size[i] = number of sites in tree rooted at i
// Note: not necessarily correct if i is not a root node
private int count; // number of components
public WeightedQuickUnionPathCompressionUF(int N) {
count = N;
parent = new int[N];
size = new int[N];
for (int i = 0; i < N; i++) {
parent[i] = i;
size[i] = 1;
}
}
/**
* @return the number of components
*/
public int count() {
return count;
}
/**
* @return the component identifier for the component containing site
*/
public int find(int p) {
int root = p;
while (root != parent[root])
root = parent[root];
while (p != root) {
int newp = parent[p];
parent[p] = root;
p = newp;
}
return root;
}
/**
* @return true if the two sites p and qare in the same component;
* false otherwise
*/
public boolean connected(int p, int q) {
return find(p) == find(q);
}
/**
* Merges the component containing site p with the
* the component containing site q.
*
* @param p the integer representing one site
* @param q the integer representing the other site
*/
public void union(int p, int q) {
int rootP = find(p);
int rootQ = find(q);
if (rootP == rootQ) return;
// make smaller root point to larger one
if (size[rootP] < size[rootQ]) {
parent[rootP] = rootQ;
size[rootQ] += size[rootP];
}
else {
parent[rootQ] = rootP;
size[rootP] += size[rootQ];
}
count--;
}
实际操作中,用 HashMap 做 parent 和 size 比起数组有着不可比拟的优越性。。比如下面这题
其实我们建的是对于每一个元素的 1-1 mapping,或者说是一个 元素之间的 graph,表示 join 关系。
中间有一个 [2147483646,-2147483647,0,2,2147483644,-2147483645,2147483645] 的 test case 始终出 bug,把 find 函数的返回 type 从 int 改到 Integer 就好了。
看来以后不能总是假设 int 和 Integer 是完全相等的,尤其是这种在 hashmap 里以 Integer 为 Key 的情况,要尽可能的保持类型正确。
public class Solution {
private class WeightedUnionFind{
private HashMap<Integer, Integer> parent;
private HashMap<Integer, Integer> size;
private int maxSize;
public WeightedUnionFind(){}
public WeightedUnionFind(int[] nums){
int N = nums.length;
parent = new HashMap<Integer, Integer>();
size = new HashMap<Integer, Integer>();
maxSize = 1;
for(int i = 0; i < N; i++){
parent.put(nums[i], nums[i]);
size.put(nums[i], 1);
}
}
private int getMaxSize(){
return this.maxSize;
}
// With path compression
public Integer find(Integer num){
if(!parent.containsKey(num)) return null;
Integer root = num;
while(root != parent.get(root)){
root = parent.get(root);
}
while(num != root){
Integer next = parent.get(num);
parent.put(num, root);
num = next;
}
return root;
}
public void union(int p, int q){
Integer pRoot = find(p);
Integer qRoot = find(q);
if(pRoot == null || qRoot == null) return;
if(pRoot == qRoot) return;
int pSize = size.get(pRoot);
int qSize = size.get(qRoot);
if(pSize > qSize){
parent.put(qRoot, pRoot);
size.put(pRoot, pSize + qSize);
maxSize = Math.max(maxSize, pSize + qSize);
} else {
parent.put(pRoot, qRoot);
size.put(qRoot, pSize + qSize);
maxSize = Math.max(maxSize, pSize + qSize);
}
}
}
public int longestConsecutive(int[] nums) {
if(nums == null || nums.length == 0) return 0;
WeightedUnionFind uf = new WeightedUnionFind(nums);
for(int num : nums){
if(num != Integer.MIN_VALUE) uf.union(num, num - 1);
if(num != Integer.MAX_VALUE) uf.union(num, num + 1);
}
return uf.maxSize;
}
}
仅仅对于这题而言,还有其他的做法,比如每次看到新元素都往两边扫。不过不是这章内容的主题。
另一种简单易懂的做法是用 HashMap 动态维护 “区间”,思路简洁易懂,面试遇到这题的话推荐还是用 HashMap,别用并查集。
public int longestConsecutive(int[] nums) {
int maxLen = 0;
if(nums == null) return maxLen;
Set<Integer> unvisited = getSet(nums);
for (int n : nums){
if(unvisited.isEmpty())break;
if(!unvisited.remove(n))continue;
int len = 1;
int leftOffset = -1;
int rightOffset = 1;
while(unvisited.remove(n+leftOffset--))len++;
while(unvisited.remove(n+rightOffset++))len++;
maxLen = Math.max(maxLen,len);
}
return maxLen;
}
private Set<Integer> getSet( int [] array){
Set<Integer> set = new HashSet<>();
for(int n : array) set.add(n);
return set;
}
常犯错误:二维转一维 index 的时候总乘错,搞混。正确的是 x * cols + y,以后自己想的时候还是用 rows / cols 吧
在这题里降维成一维 index 是可以的,不过要注意边界处理,否则某一行的最后一个元素会连通到下一行的第一个元素上去。
public class Solution {
private class WeightedUnionFind{
HashMap<Integer, Integer> parent;
HashMap<Integer, Integer> size;
int count;
public WeightedUnionFind(){
parent = new HashMap<Integer, Integer>();
size = new HashMap<Integer, Integer>();
count = 0;
}
public Integer find(Integer index){
if(!parent.containsKey(index)) return null;
Integer root = index;
while(root != parent.get(root)){
root = parent.get(root);
}
while(index != root){
Integer next = parent.get(index);
parent.put(index, root);
index = next;
}
return root;
}
public void union(Integer a, Integer b){
Integer aRoot = find(a);
Integer bRoot = find(b);
if(aRoot == null || bRoot == null) return;
if(aRoot.equals(bRoot)) return;
int aSize = size.get(aRoot);
int bSize = size.get(bRoot);
if(aSize > bSize){
parent.put(bRoot, aRoot);
size.put(aRoot, aSize + bSize);
} else {
parent.put(aRoot, bRoot);
size.put(bRoot, aSize + bSize);
}
count --;
}
public void add(Integer index){
if(!parent.containsKey(index)){
parent.put(index, index);
size.put(index, 1);
count ++;
}
}
public int getCount(){
return this.count;
}
}
public List<Integer> numIslands2(int m, int n, int[][] positions) {
List<Integer> list = new ArrayList<Integer>();
if(positions == null || positions.length == 0) return list;
WeightedUnionFind uf = new WeightedUnionFind();
for(int i = 0; i < positions.length; i++){
int x = positions[i][0];
int y = positions[i][1];
int index = x * n + y;
uf.add(index);
if(x + 1 <= m - 1) uf.union(index, (x + 1) * n + y);
if(x > 0) uf.union(index, (x - 1) * n + y);
if(y + 1 <= n - 1) uf.union(index, x * n + y + 1);
if(y > 0) uf.union(index, x * n + y - 1);
list.add(uf.getCount());
}
return list;
}
}
这题如果是把所有的 nodes 给你,其实很好做,每个点做 dfs 就好了,用 hashset 避免重复访问,毕竟是 undirected graph.
然而这题比较 gay 的地方在于。。。数据是以一个个 edge 的方式给你的,强行让你以一个 union-find 的方式一个一个节点添加,那么显然读取所有 edges 建图再去 dfs 就是很不现实的做法,而且也失去了 online algorithm 的优势。
对于 Integer type,要用 a.equals(b),不要用 ==
代码轻松愉快~
public class Solution {
private class WeightedUnionFind{
HashMap<Integer, Integer> parent;
HashMap<Integer, Integer> size;
int count;
public WeightedUnionFind(){}
public WeightedUnionFind(int n){
parent = new HashMap<Integer, Integer>();
size = new HashMap<Integer, Integer>();
count = n;
for(int i = 0; i < n; i++){
parent.put(i, i);
size.put(i, 1);
}
}
public Integer find(Integer node){
if(!parent.containsKey(node)) return null;
Integer root = node;
while(root != parent.get(root)){
root = parent.get(root);
}
while(node != root){
Integer next = parent.get(node);
parent.put(node, root);
node = next;
}
return root;
}
public void union(Integer p, Integer q){
Integer pRoot = find(p);
Integer qRoot = find(q);
if(pRoot == null || qRoot == null) return;
if(pRoot.equals(qRoot)) return;
int pSize = size.get(pRoot);
int qSize = size.get(qRoot);
if(pSize > qSize){
parent.put(qRoot, pRoot);
size.put(pRoot, pSize + qSize);
} else {
parent.put(pRoot, qRoot);
size.put(qRoot, pSize + qSize);
}
count --;
}
public int getCount(){
return this.count;
}
}
public int countComponents(int n, int[][] edges) {
if(edges == null || edges.length == 0) return n;
WeightedUnionFind uf = new WeightedUnionFind(n);
for(int i = 0; i < edges.length; i++){
int node1 = edges[i][0];
int node2 = edges[i][1];
uf.union(node1, node2);
}
return uf.getCount();
}
}
Last updated