Union-Find Algorithm Explained

Union-Find (Disjoint Set Union) Algorithm

Algorithm Overview

The Union-Find or Disjoint Set Union (DSU) is a data structure that keeps track of elements partitioned into disjoint (non-overlapping) sets. It supports two primary operations:

  • Find(x): Determines the representative (or root) of the set that x belongs to.
  • Union(x, y): Merges the sets containing x and y.

Optimizations

  • Path Compression: Flattens the structure of the tree whenever Find is called, by making each node point directly to the root.
  • Union by Rank / Size: Always attach the smaller tree under the larger one to keep the tree shallow.

Time and Space Complexity

  • Time Complexity: O(α(n)) per operation, where α is the inverse Ackermann function, which grows very slowly. Effectively O(1) for practical inputs.
  • Space Complexity: O(n), where n is the number of elements.

Union-Find C++ Implementation

class UnionFind {
  vector parent, rank;
public:
  UnionFind(int n) {
    parent.resize(n);
    rank.resize(n, 0);
    for (int i = 0; i < n; ++i) parent[i] = i;
  }

  int find(int x) {
    if (parent[x] != x) parent[x] = find(parent[x]);
    return parent[x];
  }

  void unite(int x, int y) {
    int rx = find(x), ry = find(y);
    if (rx == ry) return;
    if (rank[rx] < rank[ry]) parent[rx] = ry;
    else if (rank[rx] > rank[ry]) parent[ry] = rx;
    else { parent[ry] = rx; rank[rx]++; }
  }
};

Easy Problems

1. Check if Graph is Connected

// Given n nodes and list of edges, check if the graph is fully connected.
bool isConnected(int n, vector>& edges) {
  UnionFind uf(n);
  for (auto& e : edges) uf.unite(e.first, e.second);
  int root = uf.find(0);
  for (int i = 1; i < n; ++i)
    if (uf.find(i) != root) return false;
  return true;
}

2. Find Number of Connected Components

int countComponents(int n, vector>& edges) {
  UnionFind uf(n);
  for (auto& e : edges) uf.unite(e.first, e.second);
  unordered_set roots;
  for (int i = 0; i < n; ++i)
    roots.insert(uf.find(i));
  return roots.size();
}

Intermediate Problems

1. Redundant Connection

vector findRedundantConnection(vector>& edges) {
  UnionFind uf(1001);
  for (auto& e : edges) {
    if (uf.find(e[0]) == uf.find(e[1])) return {e[0], e[1]};
    uf.unite(e[0], e[1]);
  }
  return {};
}

2. Accounts Merge

vector> accountsMerge(vector>& accounts) {
  UnionFind uf(accounts.size());
  unordered_map emailToId;
  for (int i = 0; i < accounts.size(); ++i) {
    for (int j = 1; j < accounts[i].size(); ++j) {
      string email = accounts[i][j];
      if (emailToId.count(email)) uf.unite(i, emailToId[email]);
      else emailToId[email] = i;
    }
  }
  unordered_map> merged;
  for (auto& [email, id] : emailToId) {
    merged[uf.find(id)].insert(email);
  }
  vector> res;
  for (auto& [id, emails] : merged) {
    vector acc = {accounts[id][0]};
    acc.insert(acc.end(), emails.begin(), emails.end());
    res.push_back(acc);
  }
  return res;
}

Hard Problem

1. Number of Islands II

vector numIslands2(int m, int n, vector>& positions) {
  UnionFind uf(m * n);
  vector res;
  vector> grid(m, vector(n, false));
  int count = 0;
  vector> dirs = {{0,1},{1,0},{-1,0},{0,-1}};

  for (auto& pos : positions) {
    int x = pos.first, y = pos.second, idx = x * n + y;
    if (grid[x][y]) {
      res.push_back(count);
      continue;
    }
    grid[x][y] = true;
    ++count;
    for (auto& d : dirs) {
      int nx = x + d.first, ny = y + d.second, nidx = nx * n + ny;
      if (nx >= 0 && ny >= 0 && nx < m && ny < n && grid[nx][ny]) {
        if (uf.find(idx) != uf.find(nidx)) {
          uf.unite(idx, nidx);
          --count;
        }
      }
    }
    res.push_back(count);
  }
  return res;
}