By Fatskills Exam Guides Team — the exam nerds behind 28,500+ quizzes and 2.1M practice questions across 500+ global exams.
(Complete Guide for FAANG-Level Interviews)
"This problem appears in 1 out of every 3 onsite interviews—master it, and you prove you can design real-time systems while optimizing for time and space complexity under pressure."
Before tackling this problem, ensure you understand: 1. Heaps (Priority Queues) – Min-heap and max-heap properties, insertion/deletion in O(log N). 2. Two-Pointer Technique – Balancing two structures to maintain invariants. 3. Amortized Analysis – Why O(log N) per operation is acceptable for real-time systems.
len(max_heap) == len(min_heap) ± 1
heapq.heapify(arr)
(Repeatable mental checklist for every "Median from Data Stream" problem.)
Are there duplicates? (Assume yes unless told otherwise.)
Choose Data Structures
Optional: Hash map for lazy removal (if follow-up involves deletions).
Define Invariants
len(max_heap) == len(min_heap)
len(max_heap) == len(min_heap) + 1
All elements in max_heap ≤ all elements in min_heap.
max_heap
min_heap
Insertion Logic
Ensure size invariant holds (adjust heaps if off by > 1).
Median Access
median = (max_heap[0] + min_heap[0]) / 2
If unequal: median = max_heap[0].
median = max_heap[0]
Edge Cases
None
Duplicates → Heaps handle them naturally.
Optimize for Follow-Ups
Problem: Design a class to find the median of a continuously growing stream of integers.
import heapq class MedianFinder: def __init__(self): self.max_heap = [] # lower half (max-heap, stored as negatives) self.min_heap = [] # upper half (min-heap) def addNum(self, num: int) -> None: # Push to max_heap first heapq.heappush(self.max_heap, -num) # Move largest from max_heap to min_heap heapq.heappush(self.min_heap, -heapq.heappop(self.max_heap)) # Rebalance if min_heap is larger if len(self.min_heap) > len(self.max_heap): heapq.heappush(self.max_heap, -heapq.heappop(self.min_heap)) def findMedian(self) -> float: if len(self.max_heap) > len(self.min_heap): return -self.max_heap[0] return (-self.max_heap[0] + self.min_heap[0]) / 2
O(log N)
O(1)
O(N)
Problem: Extend the class to support removing numbers from the stream.
import heapq class MedianFinderWithDeletion: def __init__(self): self.max_heap = [] self.min_heap = [] self.active = {} # tracks active elements self.delayed = {} # tracks stale elements def _clean_heap(self, heap, is_max_heap=False): while heap: num = -heap[0] if is_max_heap else heap[0] if num in self.delayed: self.delayed[num] -= 1 if self.delayed[num] == 0: del self.delayed[num] heapq.heappop(heap) else: break def addNum(self, num: int) -> None: if num in self.active: self.active[num] += 1 else: self.active[num] = 1 heapq.heappush(self.max_heap, -num) heapq.heappush(self.min_heap, -heapq.heappop(self.max_heap)) if len(self.min_heap) > len(self.max_heap): heapq.heappush(self.max_heap, -heapq.heappop(self.min_heap)) def removeNum(self, num: int) -> None: if num not in self.active or self.active[num] == 0: return self.active[num] -= 1 if self.active[num] == 0: del self.active[num] self.delayed[num] = self.delayed.get(num, 0) + 1 self._clean_heap(self.max_heap, is_max_heap=True) self._clean_heap(self.min_heap) def findMedian(self) -> float: self._clean_heap(self.max_heap, is_max_heap=True) self._clean_heap(self.min_heap) if len(self.max_heap) > len(self.min_heap): return -self.max_heap[0] return (-self.max_heap[0] + self.min_heap[0]) / 2
Problem: Find the median of the last k elements in a stream.
k
On eviction, remove the expired element from the appropriate heap.
Balancing:
import heapq from collections import deque class SlidingWindowMedian: def __init__(self, k: int): self.k = k self.window = deque() self.max_heap = [] # lower half self.min_heap = [] # upper half def addNum(self, num: int) -> None: self.window.append(num) if len(self.window) > self.k: self._remove(self.window.popleft()) self._add(num) self._balance() def _add(self, num: int) -> None: if not self.max_heap or num <= -self.max_heap[0]: heapq.heappush(self.max_heap, -num) else: heapq.heappush(self.min_heap, num) def _remove(self, num: int) -> None: if num <= -self.max_heap[0]: # Lazy removal: mark as stale (not implemented here for brevity) pass # In practice, use a hash map like Example 2 else: pass def _balance(self) -> None: while len(self.max_heap) > len(self.min_heap) + 1: heapq.heappush(self.min_heap, -heapq.heappop(self.max_heap)) while len(self.min_heap) > len(self.max_heap): heapq.heappush(self.max_heap, -heapq.heappop(self.min_heap)) def findMedian(self) -> float: if len(self.window) < self.k: return -1 # or raise error if len(self.max_heap) == len(self.min_heap): return (-self.max_heap[0] + self.min_heap[0]) / 2 return -self.max_heap[0]
O(k)
"Alright, let’s lock this in. Here’s your 30-second cheat sheet for ‘Median from Data Stream’:
You’ve got this. Walk in, write the heaps, balance them, and own the median in O(log N) time. Now go crush that interview!" ?
Join 4M+ learners. Unlock unlimited quizzes, wrong-answer tracking, flashcards + reminders, study guides, and 1-on-1 challenges.