Approximate Median in Sub-linear Time
MediumUpdated: Aug 10, 2025
Problem
Create an algorithm to efficiently compute the approximate median of a list of numbers.
More precisely, given an unordered list of N numbers, find an element whose rank is between N / 4 and 3 * N / 4, with a high level of certainty, in less than O(N) time.
Examples
Example 1
Input: [3, 1, 9, 4, 7, 2, 8, 6, 5]
Output: 5 (or any element with rank between 2 and 6)
Explanation: N=9, so rank should be between 9/4=2.25 and 3*9/4=6.75, i.e., between 2 and 6. Elements 3,4,5,6,7 all satisfy this condition.
Example 2
Input: [10, 20, 30, 40]
Output: 20 or 30
Explanation: N=4, so rank should be between 4/4=1 and 3*4/4=3, i.e., between 1 and 3. Elements at rank 1,2,3 are 10,20,30 respectively.
Example 3
Input: [1, 100, 2, 99, 3, 98, 4, 97]
Output: Any element with rank between 2 and 6
Explanation: N=8, rank range is [2, 6]. After sorting: [1,2,3,4,97,98,99,100]. Valid elements are 2,3,4,97,98.
Example 4
Input: [5, 5, 5, 5, 5]
Output: 5
Explanation: All elements are the same, so any element satisfies the rank constraint.
Example 5
Input: [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12]
Output: Any element with rank between 3 and 9
Explanation: N=12, rank range is [3, 9]. Valid elements are 3,4,5,6,7,8,9.
Solution
Method 1 - Randomized Sampling with Confidence Intervals
Intuition
Instead of examining all N elements, we can sample a smaller subset and use statistical properties to estimate the median. By taking random samples and using order statistics, we can find an approximate median with high probability in sub-linear time.
Approach
- Take a random sample of size √N from the input array
- Sort the sample to find potential median candidates
- For each candidate, estimate its rank in the original array using additional sampling
- Return a candidate whose estimated rank falls within [N/4, 3N/4]
- Use confidence intervals to ensure high probability of success
Code
C++
class Solution {
private:
mt19937 rng;
public:
Solution() : rng(random_device{}()) {}
int approximateMedian(vector<int>& nums) {
int n = nums.size();
if (n <= 4) {
sort(nums.begin(), nums.end());
return nums[n/2];
}
int sampleSize = max(1, (int)sqrt(n));
vector<int> sample = randomSample(nums, sampleSize);
sort(sample.begin(), sample.end());
// Try candidates around the median of the sample
int start = max(0, sampleSize/2 - 1);
int end = min(sampleSize-1, sampleSize/2 + 1);
for (int i = start; i <= end; i++) {
int candidate = sample[i];
int estimatedRank = estimateRank(nums, candidate);
if (estimatedRank >= n/4 && estimatedRank <= 3*n/4) {
return candidate;
}
}
// Fallback: return middle element of sample
return sample[sampleSize/2];
}
private:
vector<int> randomSample(vector<int>& nums, int k) {
vector<int> sample;
uniform_int_distribution<int> dist(0, nums.size()-1);
for (int i = 0; i < k; i++) {
sample.push_back(nums[dist(rng)]);
}
return sample;
}
int estimateRank(vector<int>& nums, int target) {
int sampleSize = min(100, (int)nums.size());
int smallerCount = 0;
uniform_int_distribution<int> dist(0, nums.size()-1);
for (int i = 0; i < sampleSize; i++) {
if (nums[dist(rng)] < target) {
smallerCount++;
}
}
return (smallerCount * nums.size()) / sampleSize + 1;
}
};
Go
import (
"math"
"math/rand"
"sort"
"time"
)
func approximateMedian(nums []int) int {
n := len(nums)
if n <= 4 {
sort.Ints(nums)
return nums[n/2]
}
rand.Seed(time.Now().UnixNano())
sampleSize := int(math.Max(1, math.Sqrt(float64(n))))
sample := randomSample(nums, sampleSize)
sort.Ints(sample)
// Try candidates around the median of the sample
start := int(math.Max(0, float64(sampleSize/2-1)))
end := int(math.Min(float64(sampleSize-1), float64(sampleSize/2+1)))
for i := start; i <= end; i++ {
candidate := sample[i]
estimatedRank := estimateRank(nums, candidate)
if estimatedRank >= n/4 && estimatedRank <= 3*n/4 {
return candidate
}
}
// Fallback: return middle element of sample
return sample[sampleSize/2]
}
func randomSample(nums []int, k int) []int {
sample := make([]int, k)
for i := 0; i < k; i++ {
sample[i] = nums[rand.Intn(len(nums))]
}
return sample
}
func estimateRank(nums []int, target int) int {
sampleSize := int(math.Min(100, float64(len(nums))))
smallerCount := 0
for i := 0; i < sampleSize; i++ {
if nums[rand.Intn(len(nums))] < target {
smallerCount++
}
}
return (smallerCount*len(nums))/sampleSize + 1
}
Java
class Solution {
private Random rand = new Random();
public int approximateMedian(int[] nums) {
int n = nums.length;
if (n <= 4) {
Arrays.sort(nums);
return nums[n/2];
}
int sampleSize = Math.max(1, (int)Math.sqrt(n));
int[] sample = randomSample(nums, sampleSize);
Arrays.sort(sample);
// Try candidates around the median of the sample
int start = Math.max(0, sampleSize/2 - 1);
int end = Math.min(sampleSize-1, sampleSize/2 + 1);
for (int i = start; i <= end; i++) {
int candidate = sample[i];
int estimatedRank = estimateRank(nums, candidate);
if (estimatedRank >= n/4 && estimatedRank <= 3*n/4) {
return candidate;
}
}
// Fallback: return middle element of sample
return sample[sampleSize/2];
}
private int[] randomSample(int[] nums, int k) {
int[] sample = new int[k];
for (int i = 0; i < k; i++) {
sample[i] = nums[rand.nextInt(nums.length)];
}
return sample;
}
private int estimateRank(int[] nums, int target) {
int sampleSize = Math.min(100, nums.length);
int smallerCount = 0;
for (int i = 0; i < sampleSize; i++) {
if (nums[rand.nextInt(nums.length)] < target) {
smallerCount++;
}
}
return (smallerCount * nums.length) / sampleSize + 1;
}
}
Python
import random
import math
from typing import List
class Solution:
def approximate_median(self, nums: List[int]) -> int:
n = len(nums)
if n <= 4:
return sorted(nums)[n//2]
sample_size = max(1, int(math.sqrt(n)))
sample = self._random_sample(nums, sample_size)
sample.sort()
# Try candidates around the median of the sample
start = max(0, sample_size//2 - 1)
end = min(sample_size-1, sample_size//2 + 1)
for i in range(start, end + 1):
candidate = sample[i]
estimated_rank = self._estimate_rank(nums, candidate)
if n//4 <= estimated_rank <= 3*n//4:
return candidate
# Fallback: return middle element of sample
return sample[sample_size//2]
def _random_sample(self, nums: List[int], k: int) -> List[int]:
return [random.choice(nums) for _ in range(k)]
def _estimate_rank(self, nums: List[int], target: int) -> int:
sample_size = min(100, len(nums))
smaller_count = sum(1 for _ in range(sample_size)
if random.choice(nums) < target)
return (smaller_count * len(nums)) // sample_size + 1
Complexity
- ⏰ Time complexity:
O(√N log √N), where √N is the sample size and log √N for sorting the sample - 🧺 Space complexity:
O(√N), for storing the sample
Method 2 - Quickselect with Early Termination
Intuition
We can modify the quickselect algorithm to terminate early when we find an element within the acceptable rank range [N/4, 3N/4]. This gives us sub-linear expected time while maintaining good approximation guarantees.
Approach
- Use randomized quickselect but check if current pivot falls within acceptable range
- If pivot rank is within [N/4, 3N/4], return it immediately
- Otherwise, continue partitioning only the necessary side
- Add randomization and early termination for sub-linear performance
- Limit the depth of recursion to ensure sub-linear time
Code
C++
class Solution {
private:
mt19937 rng;
public:
Solution() : rng(random_device{}()) {}
int approximateMedianQuickselect(vector<int>& nums) {
int n = nums.size();
int maxDepth = (int)log2(n) + 1; // Limit recursion depth
return quickselectApprox(nums, 0, n-1, n/4, 3*n/4, 0, maxDepth);
}
private:
int quickselectApprox(vector<int>& nums, int left, int right,
int minRank, int maxRank, int depth, int maxDepth) {
if (depth >= maxDepth || right - left < 10) {
// Fallback: sort small range and return middle
sort(nums.begin() + left, nums.begin() + right + 1);
return nums[(left + right) / 2];
}
int pivotIndex = partition(nums, left, right);
int pivotRank = pivotIndex + 1; // 1-based rank
if (pivotRank >= minRank && pivotRank <= maxRank) {
return nums[pivotIndex];
}
if (pivotRank < minRank) {
return quickselectApprox(nums, pivotIndex + 1, right,
minRank, maxRank, depth + 1, maxDepth);
} else {
return quickselectApprox(nums, left, pivotIndex - 1,
minRank, maxRank, depth + 1, maxDepth);
}
}
int partition(vector<int>& nums, int left, int right) {
// Random pivot selection
uniform_int_distribution<int> dist(left, right);
int randomIndex = dist(rng);
swap(nums[randomIndex], nums[right]);
int pivot = nums[right];
int i = left - 1;
for (int j = left; j < right; j++) {
if (nums[j] <= pivot) {
i++;
swap(nums[i], nums[j]);
}
}
swap(nums[i + 1], nums[right]);
return i + 1;
}
};
Go
import (
"math"
"math/rand"
"sort"
"time"
)
func approximateMedianQuickselect(nums []int) int {
rand.Seed(time.Now().UnixNano())
n := len(nums)
maxDepth := int(math.Log2(float64(n))) + 1
return quickselectApprox(nums, 0, n-1, n/4, 3*n/4, 0, maxDepth)
}
func quickselectApprox(nums []int, left, right, minRank, maxRank, depth, maxDepth int) int {
if depth >= maxDepth || right-left < 10 {
// Fallback: sort small range and return middle
subSlice := nums[left : right+1]
sort.Ints(subSlice)
copy(nums[left:], subSlice)
return nums[(left+right)/2]
}
pivotIndex := partition(nums, left, right)
pivotRank := pivotIndex + 1 // 1-based rank
if pivotRank >= minRank && pivotRank <= maxRank {
return nums[pivotIndex]
}
if pivotRank < minRank {
return quickselectApprox(nums, pivotIndex+1, right, minRank, maxRank, depth+1, maxDepth)
} else {
return quickselectApprox(nums, left, pivotIndex-1, minRank, maxRank, depth+1, maxDepth)
}
}
func partition(nums []int, left, right int) int {
// Random pivot selection
randomIndex := left + rand.Intn(right-left+1)
nums[randomIndex], nums[right] = nums[right], nums[randomIndex]
pivot := nums[right]
i := left - 1
for j := left; j < right; j++ {
if nums[j] <= pivot {
i++
nums[i], nums[j] = nums[j], nums[i]
}
}
nums[i+1], nums[right] = nums[right], nums[i+1]
return i + 1
}
Java
class Solution {
private Random rand = new Random();
public int approximateMedianQuickselect(int[] nums) {
int n = nums.length;
int maxDepth = (int)(Math.log(n) / Math.log(2)) + 1;
return quickselectApprox(nums, 0, n-1, n/4, 3*n/4, 0, maxDepth);
}
private int quickselectApprox(int[] nums, int left, int right,
int minRank, int maxRank, int depth, int maxDepth) {
if (depth >= maxDepth || right - left < 10) {
// Fallback: sort small range and return middle
Arrays.sort(nums, left, right + 1);
return nums[(left + right) / 2];
}
int pivotIndex = partition(nums, left, right);
int pivotRank = pivotIndex + 1; // 1-based rank
if (pivotRank >= minRank && pivotRank <= maxRank) {
return nums[pivotIndex];
}
if (pivotRank < minRank) {
return quickselectApprox(nums, pivotIndex + 1, right,
minRank, maxRank, depth + 1, maxDepth);
} else {
return quickselectApprox(nums, left, pivotIndex - 1,
minRank, maxRank, depth + 1, maxDepth);
}
}
private int partition(int[] nums, int left, int right) {
// Random pivot selection
int randomIndex = left + rand.nextInt(right - left + 1);
swap(nums, randomIndex, right);
int pivot = nums[right];
int i = left - 1;
for (int j = left; j < right; j++) {
if (nums[j] <= pivot) {
i++;
swap(nums, i, j);
}
}
swap(nums, i + 1, right);
return i + 1;
}
private void swap(int[] nums, int i, int j) {
int temp = nums[i];
nums[i] = nums[j];
nums[j] = temp;
}
}
Python
import random
import math
from typing import List
class Solution:
def approximate_median_quickselect(self, nums: List[int]) -> int:
n = len(nums)
max_depth = int(math.log2(n)) + 1
return self._quickselect_approx(nums, 0, n-1, n//4, 3*n//4, 0, max_depth)
def _quickselect_approx(self, nums: List[int], left: int, right: int,
min_rank: int, max_rank: int, depth: int, max_depth: int) -> int:
if depth >= max_depth or right - left < 10:
# Fallback: sort small range and return middle
nums[left:right+1] = sorted(nums[left:right+1])
return nums[(left + right) // 2]
pivot_index = self._partition(nums, left, right)
pivot_rank = pivot_index + 1 # 1-based rank
if min_rank <= pivot_rank <= max_rank:
return nums[pivot_index]
if pivot_rank < min_rank:
return self._quickselect_approx(nums, pivot_index + 1, right,
min_rank, max_rank, depth + 1, max_depth)
else:
return self._quickselect_approx(nums, left, pivot_index - 1,
min_rank, max_rank, depth + 1, max_depth)
def _partition(self, nums: List[int], left: int, right: int) -> int:
# Random pivot selection
random_index = random.randint(left, right)
nums[random_index], nums[right] = nums[right], nums[random_index]
pivot = nums[right]
i = left - 1
for j in range(left, right):
if nums[j] <= pivot:
i += 1
nums[i], nums[j] = nums[j], nums[i]
nums[i + 1], nums[right] = nums[right], nums[i + 1]
return i + 1
Complexity
- ⏰ Time complexity:
O(N)expected,O(log N)with high probability due to depth limiting - 🧺 Space complexity:
O(log N), for recursion stack
Notes
- This problem requires finding an approximate median, not the exact median like in [Calculating the mean and median of an unsorted array](calculating-the-mean-and-median-of-an-unsorted-array)
- The rank constraint allows for a range of acceptable answers, making sub-linear solutions possible
- Method 1 uses statistical sampling which is more predictable but less precise
- Method 2 uses modified quickselect which can be faster but has probabilistic guarantees
- Both methods achieve sub-linear time complexity while maintaining high confidence in the result
- For small arrays (N ≤ 4), we fall back to exact sorting since the overhead isn't worth it