Bucket Sort Algorithm

Overview

Bucket Sort is a distribution-based sorting algorithm that works by partitioning an array into a finite number of buckets. Each bucket holds a subrange of the input values; elements are distributed into these buckets, each bucket is sorted (often with another sorting algorithm, e.g., insertion sort), and then the buckets are concatenated.

Time Complexity

  • Average: O(n + k), where n is number of elements and k is number of buckets.
  • Best: O(n + k) (when distribution is uniform and buckets are small).
  • Worst: O(n²) (if all elements fall into one bucket and that bucket is sorted by insertion sort).

Memory Complexity

  • O(n + k) additional space for buckets.

Easy Problems

1. Sort an Array of Floating-Point Numbers in [0,1)

Given an array of floats in [0,1), distribute into n buckets, sort each bucket by insertion sort, then concatenate.


#include <iostream>
#include <vector>
#include <algorithm>

void bucketSort(std::vector<float>& arr) {
    int n = arr.size();
    std::vector<std::vector<float>> buckets(n);
    for (float x : arr) {
        int idx = n * x;            // bucket index in [0..n-1]
        buckets[idx].push_back(x);
    }
    for (auto &b : buckets)
        std::sort(b.begin(), b.end());  // insertion sort also works well here
    int pos = 0;
    for (auto &b : buckets)
        for (float x : b)
            arr[pos++] = x;
}

int main() {
    std::vector<float> data = {0.42, 0.32, 0.23, 0.52, 0.25, 0.47};
    bucketSort(data);
    for (float x : data) std::cout << x << " ";
    return 0;
}
  

2. Sort Integers Within a Known Small Range [0..k]

If all integers lie in [0..k] with k = O(n), you can use k+1 buckets (one per integer), place each number in its bucket, then read out.


#include <iostream>
#include <vector>

void bucketSortRange(std::vector<int>& arr, int k) {
    std::vector<int> buckets(k+1, 0);
    for (int x : arr)
        ++buckets[x];
    int idx = 0;
    for (int i = 0; i <= k; ++i)
        while (buckets[i]-- > 0)
            arr[idx++] = i;
}

int main() {
    std::vector<int> data = {3, 6, 4, 1, 3, 4, 1, 4};
    bucketSortRange(data, 6);
    for (int x : data) std::cout << x << " ";
    return 0;
}
  

Intermediate Problems

1. Maximum Gap (LeetCode 164)

Given n numbers, find the maximum difference between successive elements in sorted order. Using bucket sort in O(n) time: place numbers into buckets by interval ≈⌊(max–min)/ (n–1)⌋, track only min/max in each bucket, then scan buckets to compute gaps.


#include <iostream>
#include <vector>
#include <limits>
#include <algorithm>

int maximumGap(const std::vector<int>& nums) {
    int n = nums.size();
    if (n < 2) return 0;
    auto [min_it, max_it] = std::minmax_element(nums.begin(), nums.end());
    int mn = *min_it, mx = *max_it;
    int bucketSize = std::max(1, (mx - mn) / (n - 1));
    int bucketCount = (mx - mn) / bucketSize + 1;
    struct B { int mn = INT_MAX, mx = INT_MIN; bool used = false; };
    std::vector<B> buckets(bucketCount);
    for (int x : nums) {
        int bi = (x - mn) / bucketSize;
        buckets[bi].used = true;
        buckets[bi].mn = std::min(buckets[bi].mn, x);
        buckets[bi].mx = std::max(buckets[bi].mx, x);
    }
    int prevMax = mn, maxGap = 0;
    for (auto &b : buckets) {
        if (!b.used) continue;
        maxGap = std::max(maxGap, b.mn - prevMax);
        prevMax = b.mx;
    }
    return maxGap;
}

int main() {
    std::vector<int> data = {3,6,9,1};
    std::cout << maximumGap(data);
    return 0;
}
  

2. Contains Duplicate III (LeetCode 220)

Given array and two integers k and t, find if there exist indices i,j such that |nums[i]–nums[j]| ≤ t and |i–j| ≤ k. Use buckets of width t+1 to map each number to a bucket and check neighboring buckets in O(n) time.


#include <iostream>
#include <vector>
#include <unordered_map>
#include <cmath>

bool containsNearbyAlmostDuplicate(
    const std::vector<long long>& nums, long long k, long long t) {
    if (t < 0) return false;
    std::unordered_map<long long,long long> buckets;
    long long w = t + 1;
    for (int i = 0; i < nums.size(); ++i) {
        long long x = nums[i];
        long long bi = x >= 0 ? x / w : (x+1) / w - 1;
        if (buckets.count(bi)) return true;
        if (buckets.count(bi-1) && llabs(x - buckets[bi-1]) <= t) return true;
        if (buckets.count(bi+1) && llabs(x - buckets[bi+1]) <= t) return true;
        buckets[bi] = x;
        if (i >= k) {
            long long y = nums[i - k];
            long long oldBi = y >= 0 ? y / w : (y+1) / w - 1;
            buckets.erase(oldBi);
        }
    }
    return false;
}

int main() {
    std::vector<long long> data = {1,2,3,1};
    std::cout << std::boolalpha
              << containsNearbyAlmostDuplicate(data, 3, 0);
    return 0;
}
  

Hard Problem

Sort Fixed-Length Strings with Radix Sort

Given an array of m strings, each of length L, sort them lexicographically in O(m·L) time using LSD Radix Sort. At each character position (from last to first), bucket by character (e.g., 256 ASCII buckets), then concatenate.


#include <iostream>
#include <vector>
#include <string>

void radixSortStrings(std::vector<std::string>& arr, int L) {
    int m = arr.size();
    const int R = 256;  // extended ASCII
    std::vector<std::string> aux(m);
    for (int d = L - 1; d >= 0; --d) {
        std::vector<int> count(R + 1, 0);
        for (auto &s : arr)
            count[(unsigned char)s[d] + 1]++;
        for (int r = 0; r < R; ++r)
            count[r+1] += count[r];
        for (auto &s : arr) {
            int c = (unsigned char)s[d];
            aux[count[c]++] = s;
        }
        arr = aux;
    }
}

int main() {
    std::vector<std::string> data = {
        "dab", "add", "cab", "fad", "fee", "bad", "dad", "bee", "fed", "bed"
    };
    radixSortStrings(data, 3);
    for (auto &s : data) std::cout << s << " ";
    return 0;
}